# Model Evaluation Notebook

This notebook helps you evaluate trained models with detailed metrics.

## Features:
1. Load trained model and test dataset
2. Calculate detailed metrics (accuracy, precision, recall, F1)
3. Generate confusion matrix
4. Visualize predictions vs ground truth


In [None]:
import sys
from pathlib import Path
import torch
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, classification_report,
    accuracy_score, precision_recall_fscore_support
)

# Add src to path
sys.path.insert(0, str(Path().resolve().parent / "src"))

from src.models.classifier_mobilenet import MobileNetV3Classifier
from src.data.dataset import CropDataset
from src.inference.predict_image import predict_disease
from src.utils.helpers import get_device


## 1. Load Model and Test Data


In [None]:
# Configuration
MODEL_PATH = "../outputs/models/checkpoints/best_model.pth"
NUM_CLASSES = 10
CLASS_NAMES = [
    "healthy", "leaf_blight", "rust", "powdery_mildew",
    "bacterial_spot", "early_blight", "late_blight",
    "mosaic_virus", "anthracnose", "other"
]  # Update with your class names

device = get_device()

# Load model
model = MobileNetV3Classifier(num_classes=NUM_CLASSES, pretrained=False)
checkpoint = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(checkpoint.get('model_state_dict', checkpoint))
model.eval()
model = model.to(device)

# Load test dataset
test_dataset = CropDataset(
    data_dir="../data/raw/test",
    annotations_file="../data/splits/test_annotations.csv",
    image_size=224,
    augment=False
)

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
print(f"Test samples: {len(test_dataset)}")
