In [1]:
import os
import numpy as np
import pandas
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score

# Import all models
from resnet import ResNet18, ResidualBlock
from mobilenet import MobileNetV1, MobileNetV2
from vggnet import VGG16
from data import ImgDataset

In [2]:
seed = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(seed)
np.random.seed(seed)
print(f"Using device: {device}")

In [3]:
# Define base paths (adjusted to relative or specific paths as needed)
# Ensure these point to where 'preprocess.py' saved the data
BASE_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(BASE_DIR, "Dataset", "plantvillage dataset")
CSV_PATH = os.path.join(DATA_DIR, "dataframes")
TRAIN_IMG = os.path.join(DATA_DIR, "train")
VAL_IMG = os.path.join(DATA_DIR, "val")
TEST_IMG = os.path.join(DATA_DIR, "test")

In [4]:
train_csv_path = os.path.join(CSV_PATH, "train_labels.csv")
val_csv_path = os.path.join(CSV_PATH, "val_labels.csv")
test_csv_path = os.path.join(CSV_PATH, "test_labels.csv")

print(f"Path to training CSV: {train_csv_path}")
print(f"Path to validation CSV: {val_csv_path}")
print(f"Path to test CSV: {test_csv_path}")

train_csv = pandas.read_csv(train_csv_path)
val_csv = pandas.read_csv(val_csv_path)
test_csv = pandas.read_csv(test_csv_path)

In [None]:
stats = ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

mild_transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.ToTensor(),
    T.Normalize(*stats)
])

moderate_transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.RandomRotation(90),
    T.RandomResizedCrop(256, scale=(0.8, 1.0)),
    T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.02),
    T.ToTensor(),
    T.Normalize(*stats)
])

aggressive_transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.RandomRotation(180),
    T.RandomResizedCrop(256, scale=(0.3, 1.0)),
    T.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.03),
    T.RandomApply([T.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0))], p=0.5),
    T.ToTensor(),
    T.Normalize(*stats)
])

In [40]:
val_ds = ImgDataset(val_csv, VAL_IMG, transform=mild_transform)
test_ds = ImgDataset(test_csv, TEST_IMG, transform=mild_transform)

num_train = len(train_csv)
mild_end = int(0.6 * num_train)
moderate_end = int(0.9 * num_train)

train_ds_mild = ImgDataset(train_csv.iloc[:mild_end], TRAIN_IMG, transform=mild_transform)
train_ds_moderate = ImgDataset(train_csv.iloc[mild_end:moderate_end], TRAIN_IMG, transform=moderate_transform)
train_ds_aggressive = ImgDataset(train_csv.iloc[moderate_end:], TRAIN_IMG, transform=aggressive_transform)

train_ds = torch.utils.data.ConcatDataset([train_ds_mild, train_ds_moderate, train_ds_aggressive])

num_classes = len(train_ds_mild.classes)
print(f"Number of classes: {num_classes}")
print(train_ds_mild.classes)

In [45]:
# Global Params
batch_size = 32
lr = 2e-4
weight_decay = 1e-3
MODEL_DIR = "../models"
os.makedirs(MODEL_DIR, exist_ok=True)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)

# ---------------------------------------------------------
# 1 & 2. CONFIGURATION FOR ALL MODELS
# Each model has its own Epochs and Early Stopping Patience
# ---------------------------------------------------------
model_configs = [
    {
        "name": "ResNet18",
        "model": ResNet18(block=ResidualBlock, blocks_per_layer=[2, 2, 2, 2], n_channels=3, n_classes=num_classes),
        "epochs": 15,
        "patience": 5
    },
    {
        "name": "MobileNetV1",
        "model": MobileNetV1(n_channels=3, n_classes=num_classes),
        "epochs": 20,  # Light models often need longer to converge
        "patience": 7
    },
    {
        "name": "MobileNetV2",
        "model": MobileNetV2(n_channels=3, n_classes=num_classes),
        "epochs": 20,
        "patience": 7
    },
    {
        "name": "VGG16",
        "model": VGG16(n_channels=3, n_classes=num_classes),
        "epochs": 15,
        "patience": 5
    }
]

In [6]:
# ---------------------------------------------------------
# Helper Function: Train and Evaluate with Early Stopping & F1
# ---------------------------------------------------------
def train_and_evaluate(config):
    name = config["name"]
    model = config["model"].to(device)
    num_epochs = config["epochs"]
    patience = config["patience"]
    
    print(f"\n{'='*40}\nTraining {name}\n{'='*40}")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    best_val_acc = 0.0
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        # --- TRAINING ---
        model.train()
        train_loss, correct, total = 0, 0, 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        avg_train_loss = train_loss / len(train_loader)
        train_acc = correct / total * 100
        
        # --- VALIDATION (Accuracy + F1 Score) ---
        model.eval()
        val_loss, correct, total = 0, 0, 0
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total * 100
        val_f1 = f1_score(all_labels, all_preds, average='weighted')
        
        print(f"Results: Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
        print(f"         Train Acc:  {train_acc:.2f}%   | Val Acc:  {val_acc:.2f}%")
        print(f"                                     | Val F1:   {val_f1:.4f}")
        
        # --- EARLY STOPPING CHECK ---
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            save_path = os.path.join(MODEL_DIR, f"{name}_best.pth")
            torch.save(model.state_dict(), save_path)
            print(f"--> Best model saved to {save_path}")
        else:
            epochs_no_improve += 1
            print(f"--> No improvement for {epochs_no_improve}/{patience} epochs.")
            if epochs_no_improve >= patience:
                print(f"--> Early stopping triggered for {name}!")
                break
    
    return best_val_acc

In [7]:
# ---------------------------------------------------------
# Main Execution Loop
# ---------------------------------------------------------
results = {}

for config in model_configs:
    best_acc = train_and_evaluate(config)
    results[config["name"]] = best_acc
    
print("\nFinal Best Validation Accuracies:")
for name, acc in results.items():
    print(f"{name}: {acc:.2f}%")

In [8]:
# ---------------------------------------------------------
# 4. MODEL SIZE & PARAMETER COUNT
# ---------------------------------------------------------
def get_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    return size_all_mb

print(f"{'Model':<15} | {'Params (M)':<12} | {'Size (MB)':<10}")
print("-"*45)

for config in model_configs:
    model = config["model"]
    # Move to CPU for static calculation just in case
    model.cpu()
    
    num_params = sum(p.numel() for p in model.parameters()) / 1e6
    size_mb = get_model_size(model)
    
    print(f"{config['name']:<15} | {num_params:<12.2f} | {size_mb:<10.2f}")

# 3. Should I prioritize Accuracy or F1 Score?

**Short Answer:** For the PlantVillage dataset (which has class imbalance), you should prioritize **F1 Score**.

**Detailed Explanation:**
1.  **Accuracy** simply measures the percentage of correct predictions. If your dataset is imbalanced (e.g., 90% "Healthy" and 10% "Rot"), a model that blindly guesses "Healthy" for everything will get 90% Accuracy but is completely useless for disease detection.
2.  **F1 Score** is the harmonic mean of Precision and Recall. It penalizes the model if it fails to detect the minority classes (diseases) or generates too many false alarms. 
3.  **Context:** In agriculture, False Negatives (missing a disease) are dangerous. F1 Score gives you a much more honest picture of how well the model is actually identifying the specific diseases.