# Tomato Leaf Disease Detection ‚Äì (Cross-Domain)

**Training:** PlantVillage (Healthy + Diseased)
**Testing:** TomatoVillage (Diseased only)

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/TP

Mounted at /content/drive
/content/drive/MyDrive/TP


In [2]:
!pip install timm albumentations torch grad-cam lime scikit-image onnx onnxruntime

Collecting grad-cam
  Downloading grad-cam-1.5.5.tar.gz (7.8 MB)
[?25l     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/7.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.3/7.8 MB[0m [31m39.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m7.8/7.8 MB[0m [31m129.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.8/7.8 MB[0m [31m93.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone

In [3]:
# COMPLETE PIPELINE: DATASET ‚Üí TRAIN ‚Üí EVAL ‚Üí REPORT ‚Üí COMPARE
# Models: DenseNet121, ResNet101, DenseNet201, EfficientNet-B4

import os
import torch
import torch.nn as nn
import torch.optim as optim
import timm
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score
import albumentations as A
from albumentations.pytorch import ToTensorV2

# ---------------- DEVICE ----------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# ---------------- PATHS ----------------
TRAIN_ROOT = "/content/drive/MyDrive/TP/PlantVillage"  # training dataset
TEST_ROOT = "/content/drive/MyDrive/TP/TomatoVillage"  # cross-domain test dataset
# ---------------- TRANSFORMS ----------------
transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225)
    ),
    ToTensorV2()
])

# ---------------- DATASET CLASS ----------------
class TomatoDataset(Dataset):
    def __init__(self, root, classes):
        self.samples = []
        for label, cls in enumerate(classes):
            cls_dir = os.path.join(root, cls)
            if not os.path.isdir(cls_dir):
                continue
            for f in os.listdir(cls_dir):
                if f.lower().endswith((".jpg", ".png", ".jpeg")):
                    self.samples.append((os.path.join(cls_dir, f), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = np.array(Image.open(path).convert("RGB"))
        img = transform(image=img)["image"]
        return img, label

# ---------------- CLASSES ----------------
classes = sorted([
    d for d in os.listdir(TRAIN_ROOT)
    if os.path.isdir(os.path.join(TRAIN_ROOT, d))
])
NUM_CLASSES = len(classes)

print("Classes:", classes)
print("NUM_CLASSES:", NUM_CLASSES)

train_dataset = TomatoDataset(TRAIN_ROOT, classes)
test_dataset  = TomatoDataset(TEST_ROOT, classes)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))

# ---------------- TRAINING CONFIG ----------------
# ---------------- TRAINING CONFIG (FAST) ----------------
models_to_train = {
    "DenseNet121": "densenet121",
    "ResNet101": "resnet101",
    "DenseNet201": "densenet201",
    "EfficientNetB4": "efficientnet_b4"
}

criterion = nn.CrossEntropyLoss()
LR = 1e-4

epoch_plan = {
    "DenseNet121": 10,
    "ResNet101": 3,
    "DenseNet201": 3,
    "EfficientNetB4": 3
}

for model_name, timm_name in models_to_train.items():
    EPOCHS = epoch_plan[model_name]
    print(f"\nüöÄ Training {model_name} for {EPOCHS} epochs")

    model = timm.create_model(
        timm_name,
        pretrained=True,
        num_classes=NUM_CLASSES
    ).to(device)

    #  FREEZE BACKBONE (KEY SPEED BOOST)
    for param in model.parameters():
        param.requires_grad = False
    for param in model.get_classifier().parameters():
        param.requires_grad = True

    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=LR
    )

    # -------- TRAIN --------
    for epoch in range(EPOCHS):
        model.train()
        correct, total = 0, 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        print(f"{model_name} Epoch {epoch+1}/{EPOCHS} | "
              f"Train Acc: {100*correct/total:.2f}%")

    torch.save(model.state_dict(), f"{model_name.lower()}_teacher.pth")

    # -------- EVALUATE --------
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)

    print(f"\n {model_name} Classification Report:\n")
    print(
        classification_report(
            all_labels,
            all_preds,
            target_names=classes,
            zero_division=0
        )
    )

    summary_results.append({
        "Model": model_name,
        "Cross-Domain Accuracy (%)": round(acc * 100, 2)
    })

# FINAL COMPARISON TABLE

df_summary = pd.DataFrame(summary_results)

print("\nFINAL MODEL COMPARISON\n")
display(df_summary)

Device: cuda
Classes: ['Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Tomato___Target_Spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Tomato_mosaic_virus', 'Tomato___healthy']
NUM_CLASSES: 10
Train samples: 14543
Test samples: 0


NameError: name 'models_to_train' is not defined

In [4]:
# COMPLETE CROSS-DOMAIN PIPELINE

import os, re, torch, timm, numpy as np, pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import classification_report, accuracy_score
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

TRAIN_ROOT = "/content/drive/MyDrive/TP/PlantVillage"
TEST_ROOT  = "/content/drive/MyDrive/TP/TomatoVillage"

def normalize_name(name):
    name = name.lower()
    name = re.sub(r"tomato[_ ]*", "", name)
    name = re.sub(r"[^a-z_ ]", "", name)
    name = name.replace(" ", "_")
    return name.strip("_")

raw_classes = sorted([
    d for d in os.listdir(TRAIN_ROOT)
    if os.path.isdir(os.path.join(TRAIN_ROOT, d))
])

plant_label_map = {}
for idx, folder in enumerate(raw_classes):
    norm = normalize_name(folder)
    plant_label_map[norm] = idx

print("Normalized PlantVillage labels:")
for k, v in plant_label_map.items():
    print(k, "->", v)

NUM_CLASSES = len(plant_label_map)
classes = raw_classes

transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),
    ToTensorV2()
])

class CrossDomainDataset(Dataset):
    def __init__(self, root, label_map):
        self.samples = []
        for folder in os.listdir(root):
            norm = normalize_name(folder)
            if norm not in label_map:
                continue
            label = label_map[norm]
            folder_path = os.path.join(root, folder)
            for f in os.listdir(folder_path):
                if f.lower().endswith((".jpg",".png",".jpeg")):
                    self.samples.append((os.path.join(folder_path,f), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = np.array(Image.open(path).convert("RGB"))
        img = transform(image=img)["image"]
        return img, label

train_dataset = CrossDomainDataset(TRAIN_ROOT, plant_label_map)
test_dataset  = CrossDomainDataset(TEST_ROOT, plant_label_map)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))

models_to_train = {
    "DenseNet121": "densenet121",
    "ResNet101": "resnet101",
    "DenseNet201": "densenet201",
    "EfficientNetB4": "efficientnet_b4"
}

epoch_plan = {
    "DenseNet121": 10,
    "ResNet101": 3,
    "DenseNet201": 3,
    "EfficientNetB4": 3
}

criterion = nn.CrossEntropyLoss()
LR = 1e-4
summary = []

for name, timm_name in models_to_train.items():
    EPOCHS = epoch_plan[name]
    print(f"\nTraining {name} for {EPOCHS} epochs")

    model = timm.create_model(timm_name, pretrained=True, num_classes=NUM_CLASSES).to(device)

    for p in model.parameters():
        p.requires_grad = False
    for p in model.get_classifier().parameters():
        p.requires_grad = True

    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)

    for epoch in range(EPOCHS):
        model.train()
        for x,y in train_loader:
            x,y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

    torch.save(model.state_dict(), f"{name.lower()}_teacher.pth")

    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x,y in test_loader:
            x,y = x.to(device), y.to(device)
            preds = torch.argmax(model(x), dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    print(f"\n{name} Classification Report:\n")
    print(classification_report(all_labels, all_preds, zero_division=0))

    summary.append({
        "Model": name,
        "Cross-Domain Accuracy (%)": round(acc*100, 2)
    })

df = pd.DataFrame(summary)
print("\nFINAL CROSS-DOMAIN COMPARISON\n")
display(df)


Device: cuda
Normalized PlantVillage labels:
bacterial_spot -> 0
early_blight -> 1
late_blight -> 2
leaf_mold -> 3
septoria_leaf_spot -> 4
spider_mites_twospotted_spider_mite -> 5
target_spot -> 6
yellow_leaf_curl_virus -> 7
mosaic_virus -> 8
healthy -> 9
Train samples: 14543
Test samples: 1616

Training DenseNet121 for 10 epochs


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/32.3M [00:00<?, ?B/s]


DenseNet121 Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.30      0.03      0.05       496
           2       0.59      0.86      0.70       904
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           9       0.10      0.01      0.02       216

    accuracy                           0.49      1616
   macro avg       0.14      0.13      0.11      1616
weighted avg       0.44      0.49      0.41      1616


Training ResNet101 for 3 epochs


model.safetensors:   0%|          | 0.00/179M [00:00<?, ?B/s]


ResNet101 Classification Report:

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       496
           2       0.97      0.10      0.19       904
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           9       0.00      0.00      0.00       216

    accuracy                           0.06      1616
   macro avg       0.16      0.02      0.03      1616
weighted avg       0.54      0.06      0.11      1616


Training DenseNet201 for 3 epochs


model.safetensors:   0%|          | 0.00/81.1M [00:00<?, ?B/s]


DenseNet201 Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.38      0.04      0.08       496
           2       0.61      0.87      0.72       904
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           9       0.00      0.00      0.00       216

    accuracy                           0.50      1616
   macro avg       0.14      0.13      0.11      1616
weighted avg       0.46      0.50      0.42      1616


Training EfficientNetB4 for 3 epochs


model.safetensors:   0%|          | 0.00/77.9M [00:00<?, ?B/s]


EfficientNetB4 Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.27      0.03      0.05       496
           2       0.62      0.65      0.63       904
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.38      0.01      0.03       216

    accuracy                           0.37      1616
   macro avg       0.13      0.07      0.07      1616
weighted avg       0.48      0.37      0.37      1616


FINAL CROSS-DOMAIN COMPARISON



Unnamed: 0,Model,Cross-Domain Accuracy (%)
0,DenseNet121,48.95
1,ResNet101,5.82
2,DenseNet201,50.19
3,EfficientNetB4,37.13
