<a href="https://www.kaggle.com/code/member09/leaf-disease-classification?scriptVersionId=189150243" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision
# resnet = torchvision.models.resnet18(pretrained=True)
# torch.save(resnet, '/kaggle/working/resnet18_v1.pth')

In [None]:
resnet18_model = torch.load('/kaggle/input/resnet18/pytorch/v1/1/resnet18_v1.pth')
resnet18_model

In [None]:
# Read the given data

import os
import albumentations # for augumentations
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
from sklearn import metrics, model_selection
import cv2

%matplotlib inline

In [None]:
df_train_data = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
df_train_data

In [None]:
df_train_data.label.value_counts()

In [None]:
df_train, df_valid = model_selection.train_test_split(df_train_data, test_size=0.1, random_state=109, stratify=df_train_data["label"].values)

df_train.reset_index(drop=True, inplace=True)
df_valid.reset_index(drop=True, inplace=True)

In [None]:
df_train.shape, df_valid.shape

In [None]:
image_path = "../input/cassava-leaf-disease-classification/train_images"
train_image_paths = [os.path.join(image_path, x) for x in df_train["image_id"].values]
valid_image_paths = [os.path.join(image_path, x) for x in df_valid["image_id"].values]

len(train_image_paths), len(valid_image_paths), train_image_paths[:3]

In [None]:
train_targets = df_train["label"].values
valid_targets = df_valid["label"].values

train_targets

In [None]:
class LeafImageDataset:
    def __init__(self, image_paths, targets, augumentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.augumentations = augumentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        target = self.targets[idx]
        image = cv2.imread(self.image_paths[idx])
        image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.augumentations is not None:
            augumented = self.augumentations(image=image)
            image = augumented["image"]
#             mask = augumented["mask"]
        image= np.transpose(image,(2,0,1)).astype(np.float32)
        return {
            "image" : torch.tensor(image),
#             "mask" : torch.tensor(mask),
            "target" : torch.tensor(target)
        }

In [None]:
train_dataset = LeafImageDataset(train_image_paths, train_targets)
train_dataset[0]
valid_dataset = LeafImageDataset(valid_image_paths, valid_targets)

In [None]:
def plot_image(image_dict):
    img_tensor = image_dict["image"]
    target = image_dict["target"]
    print(target.item())
    plt.figure(figsize=(5,10))
    image = img_tensor.permute(1,2,0)/255
    plt.imshow(image)

plot_image(train_dataset[10])

In [None]:
train_aug = albumentations.Compose(
[
    albumentations.RandomResizedCrop(256, 256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])


valid_aug = albumentations.Compose(
[
    albumentations.CenterCrop(256, 256, p=1.0),
    albumentations.Resize(256, 256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])


train_dataset = LeafImageDataset(train_image_paths, train_targets, augumentations=train_aug)
valid_dataset = LeafImageDataset(valid_image_paths, valid_targets, augumentations=valid_aug)
train_dataset[10]


In [None]:
plot_image(train_dataset[10])

In [None]:
?torch.utils.data.DataLoader

In [None]:
# Dataloader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64,num_workers=2 )
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=64,num_workers=2 )


In [None]:
for data in train_loader:
    print(data["image"], data["target"])
    break

In [None]:
# torchvision.models.resnet18(pretrained=False)

In [None]:
class LeafModel(nn.Module):
    def __init__(self, num_classes, pretrained=True):
        super().__init__()
#         self.convnet = torchvision.models.resnet18(pretrained=pretrained)
        self.convnet = resnet18_model
        self.convnet.fc = nn.Linear(512, num_classes)
        self.step_scheduler_after = "epoch"
        
    def loss(self, outputs, targets):
        if targets is None:
            return None
        return nn.CrossEntropyLoss()(outputs, targets)
    
    def monitor_metrics(self, outputs, targets):
        outputs = torch.argmax(outputs, dim=1).cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        acc = metrics.accuracy_score(targets, outputs)
        return {
            "accuracy" : acc
        }
        
    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-3)
        return opt
    
    def fetch_scheduler(self, optimizer):
        sch = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)
        return sch
        
    def forward(self, img, targets=None):
        outputs = self.convnet(img)
        loss = None
        metrics = {}
        if targets is not None:
            loss = self.loss(outputs, targets)
            metrics = self.monitor_metrics(outputs, targets)
        return outputs, loss, metrics
       
num_classes=df_train_data["label"].nunique()
model = LeafModel(num_classes=num_classes, pretrained=True)
model
        

In [None]:
model.train

In [None]:
# trial
img_ = train_dataset[10]["image"].unsqueeze(0)
y_ = train_dataset[10]["target"].unsqueeze(0)
model(img_, y_)

In [None]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

early_stopper = EarlyStopper(patience=3, min_delta=10)
# for epoch in np.arange(n_epochs):
#     train_loss = train_one_epoch(model, train_loader)
#     validation_loss = validate_one_epoch(model, validation_loader)
#     if early_stopper.early_stop(validation_loss):             
#         break

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        
    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0
        


def train(model, train_dataset, valid_dataset, device, num_epochs=1, patience=3):
    model.to(device)
    optimizer = model.fetch_optimizer()
    scheduler = model.fetch_scheduler(optimizer)
    early_stopping = EarlyStopping(patience=patience)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for batch in train_dataset:
            optimizer.zero_grad()
            image = batch["image"].to(device)
            target = batch["target"].to(device)
            output, loss, acc = model(image, target)
            loss.backward()
            optimizer.step()

            
            train_loss += loss.item()
        train_loss = train_loss / len(train_dataset)
        
        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for batch_v in valid_dataset:
                image = batch_v["image"].to(device)
                target = batch_v["target"].to(device)
                output, loss, acc = model(image, target)
                valid_loss += loss.item()
        valid_loss = valid_loss / len(valid_dataset)

        if model.step_scheduler_after == "epoch":
            scheduler.step()
            
        print(
          f"Epoch : {epoch+1} / {num_epochs} ..",
          f"Train loss : {train_loss:.3f} ",
          f"Validation loss : {valid_loss:.3f} "
        )
        # Early stopping check
        early_stopping(valid_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break
        
        # Optionally step the scheduler if it's set to step per batch
#         if model.step_scheduler_after == "batch":
#             for _ in range(len(train_dataset)):
#                 scheduler.step()

train(model, train_loader, valid_loader, device=device, num_epochs=10)

In [None]:
test_df_data = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
image_path = "../input/cassava-leaf-disease-classification/test_images/"
test_image_paths = [
    os.path.join(image_path, x) for x in test_df_data["image_id"].values
]


test_aug = albumentations.Compose(
[
    albumentations.CenterCrop(256, 256, p=1.0),
    albumentations.Resize(256, 256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])

test_targets = test_df_data["label"].values

test_dataset = LeafImageDataset(test_image_paths, test_targets, augumentations=valid_aug)

test_loader = torch.utils.data.DataLoader(test_dataset)

In [None]:
test_df_data

In [None]:

def test(model, test_loader, device):
    model.to(device)
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    final_pred = None
    with torch.no_grad():
        for batch in test_loader:
            image = batch["image"].to(device)
            target = batch["target"].to(device)
            output, loss, _ = model(image, target)
            test_loss += loss.item()
            pred = torch.argmax(output, dim=1)
            if final_pred is None:
                final_pred = pred.item()
            else:
                final_pred = np.vstack((final_pred, pred.item()))
            correct += (pred == target).sum().item()
            total += target.size(0)
    
    test_loss /= len(test_loader)
    accuracy = correct / total
    
    print(f"Test loss: {test_loss:.3f}, Test accuracy: {accuracy:.3f}")
    
    return final_pred, test_loss, accuracy

final_preds, test_loss, test_accuracy = test(model, test_loader, device=device)

In [None]:
final_preds

In [None]:
test_df_data["label"] = final_preds
test_df_data.to_csv("submission.csv", index=False)