## Table of content:
- [1. Import libraries](#lib)
- [2. Downloading data](#data)
- [3. Model preparation](#model)
- [4. Training, testing](#train-test)
- [5. Results](#res)

<a id='lib'></a>
### 1. Import libraries

In [None]:
import os
import torch
import torchvision
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

plt.style.use('seaborn-whitegrid')

torch.manual_seed(0)

<a id='data'></a>
### 2. Downloading data

In [None]:
# Define paths
dir_path_train = '/kaggle/input/tomatoleaf/tomato/train' # change files to file saved paths
dir_path_test = '/kaggle/input/tomatoleaf/tomato/val'


# Define transformations
transformer = torchvision.transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip()
])

# Load datasets
datafolder_train = datasets.ImageFolder(root=dir_path_train, transform=transformer)
datafolder_test = datasets.ImageFolder(root=dir_path_test, transform=transformer)

# Define proportions for the split
train_proportion = 0.8
val_proportion = 0.1

# Calculate sizes for each split
val_size = int(val_proportion * len(datafolder_train))
train_size = len(datafolder_train) - val_size

# Split the dataset
train_set, val_set = random_split(datafolder_train, [train_size, val_size])

# Create data loaders for train, validation, and test sets
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(datafolder_test , batch_size=32, shuffle=False)

# print sizes of each set
print('Train size:', len(train_set))
print('Validation size:', len(val_set))
print('test size:',len(datafolder_test))

In [None]:
# a dictionary of class labels
def getClass(label):
    class_names = {'Tomato___Bacterial_spot': 'Bacterial Spot',
                'Tomato___Early_blight': 'Early Blight',
                'Tomato___Late_blight': 'Late Blight',
                'Tomato___Leaf_Mold': 'Leaf Mold',
                'Tomato___Septoria_leaf_spot': 'Septoria Leaf Spot',
                'Tomato___Spider_mites Two-spotted_spider_mite': 'Spider Mites',
                'Tomato___Target_Spot': 'Target Spot',
                'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 'Tomato Yellow Leaf Curl Virus',
                'Tomato___Tomato_mosaic_virus': 'Tomato Mosaic Virus',
                'Tomato___healthy': 'Healthy'
    }

    return class_names[label]

# a batch of images and labels
images, labels = next(iter(train_loader))

# a grid of images
plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(images[i].permute(1,2,0))
    plt.title(getClass(datafolder_train.classes[labels[i]]))
    plt.axis('off')

# Show the plot
plt.show()


In [None]:
class_mapping = {
    0: 'Tomato___Bacterial_spot',
    1: 'Tomato___Early_blight',
    2: 'Tomato___Late_blight',
    3: 'Tomato___Leaf_Mold',
    4: 'Tomato___Septoria_leaf_spot',
    5: 'Tomato___Spider_mites Two-spotted_spider_mite',
    6: 'Tomato___Target_Spot',
    7: 'Tomato___Tomato_Yellow_Leaf_Curl_Virus',
    8: 'Tomato___Tomato_mosaic_virus',
    9: 'Tomato___healthy'
}
class_counts = {label: 0 for label in class_labels}

train_labels = []
for images, labels in train_loader:
  train_labels.extend(labels.tolist())

val_labels = []
for images, labels in val_loader:
  val_labels.extend(labels.tolist())

test_labels = []
for images, labels in test_loader:
  test_labels.extend(labels.tolist())

all_labels = []
all_labels.extend(train_labels)
all_labels.extend(val_labels)
all_labels.extend(test_labels)

unique_labels = set(all_labels)
print("Unique labels:", unique_labels)

for label in all_labels:
    class_name = class_mapping[label]
    class_counts[class_name] += 1

# show class counts
print("\nClass Distribution:")
for label, count in class_counts.items():
    print(f"{label}: {count}")

In [None]:
class_labels_shortened = {
    'Tomato___Bacterial_spot': 'Bacterial spot',
    'Tomato___Early_blight': 'Early blight',
    'Tomato___Late_blight': 'Late blight',
    'Tomato___Leaf_Mold': 'Leaf Mold',
    'Tomato___Septoria_leaf_spot': 'Septoria leaf spot',
    'Tomato___Spider_mites Two-spotted_spider_mite': 'Spider Mites',
    'Tomato___Target_Spot': 'Target Spot',
    'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 'Yellow Leaf Curl Virus',
    'Tomato___Tomato_mosaic_virus': 'Mosaic Virus',
    'Tomato___healthy': 'Healthy'
}

class_counts_shortened = {class_labels_shortened[label]: count for label, count in class_counts.items()}

# plot
plt.figure(figsize=(10, 6))
plt.barh(list(class_counts_shortened.keys()), list(class_counts_shortened.values()))
plt.xlabel('Frequency')
plt.ylabel('Class')
plt.title('The Class Distribution of Tomato Leaf Disease Dataset')
plt.gca().invert_yaxis()
plt.show()


<a id='model'></a>
### 3. Model preparation

In [None]:
numClasses = 10
model = torchvision.models.resnet50(pretrained=True)

# freezing all parameters except for the final layer, we train it
for param in model.parameters():
    param.requires_grad = False

model.fc = torch.nn.Linear(model.fc.in_features, numClasses)

In [None]:
model

<a id='train-test'></a>
### 4. Training, testing

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
num_epochs = 10
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Training loop
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in tqdm(range(num_epochs)):

        # Training mode
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:

            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = loss_fn(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        print(f"\nTrain accuracy {train_loss:.4f}, Train loss {train_accuracy:.4f}")

        # Validation mode
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():

            for images, labels in val_loader:

                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = loss_fn(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f"\nValidation accuracy {val_accuracy:.4f}, Validation loss {val_loss:.4f}")

In [None]:
model.eval()
all_predicted = []
all_labels = []

for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)

    y_pred_test = model(images)
    _, predicted = torch.max(y_pred_test, 1)

    all_predicted.extend(predicted.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_predicted))

<a id='res'></a>
### 5. Results

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 6))
ax[0].plot(train_accuracies, '-*', label='Train')
ax[0].plot(val_accuracies, '-*', label='Test')
ax[0].set_title('Accuracy Score Plot')
ax[0].set_xticks(range(0, 3*num_epochs+1, 3), range(0, num_epochs+1))
ax[0].set_xlabel('Epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend()

ax[1].plot(train_losses, '-o', label='Train');
ax[1].plot(val_losses, '-o', label='Test');
ax[1].set_title('Loss Plot')
ax[1].set_xticks(range(0, 3*num_epochs+1,3), range(0, num_epochs+1))
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('Loss Function Value')
ax[1].legend();

In [None]:
print(train_accuracies)


In [None]:
print(val_accuracies)

In [None]:
print(train_losses)

In [None]:
print(val_losses)

In [None]:
torch.save(model.state_dict(), "Tomato_disease_model_lr_0.01-r.pth")