In [None]:
"""
COMP 6915-W25 Final Project
Vehicle Make and Model Recognition using Fine-Tuned AlexNet
Team: Victor Onwosi, Esther Ukpe, Jager Cooper

This notebook implements a VMMR system using FastAI and AlexNet.
It includes: dataset setup, transfer learning, evaluation, and visualization.

Expected Folder Structure:
- dataset/train/
- dataset/valid/
- visualizations/
- models/
"""
# --------------------------------------
# ✅ STEP 1: Install fastai (latest)
# --------------------------------------
%pip install -U fastai
%pip install scikit-learn

In [None]:
# --------------------------------------
# ✅ STEP 2: Imports
# --------------------------------------
from fastai.vision.all import *
from pathlib import Path
import matplotlib.pyplot as plt
from torchvision.models import alexnet
from sklearn.metrics import classification_report

# Local paths
base_dir = Path(".")
data_dir = base_dir / "dataset"
models_dir = base_dir / "models"
visual_dir = base_dir / "visualizations"

# Create output dirs
models_dir.mkdir(parents=True, exist_ok=True)
visual_dir.mkdir(parents=True, exist_ok=True)

# ===============================
# ✅ STEP 1: Load Data
# ===============================
dls = ImageDataLoaders.from_folder(
    data_dir,
    train='train',
    valid='valid',
    item_tfms=Resize(224),
    batch_tfms=[
        *aug_transforms(
            do_flip=True,
            flip_vert=True,
            max_rotate=5.0,
            max_zoom=1.1,
            max_lighting=0.2,
            max_warp=0.2,
            p_affine=0.75,
            p_lighting=0.75
        ),
        Normalize.from_stats(*imagenet_stats)
    ],
    bs=32
)

# ===============================
# 📷 STEP 2: Show & Save Sample Batch
# ===============================
dls.show_batch(max_n=9, figsize=(8,6))
plt.savefig(visual_dir / 'sample_batch.png')
plt.close()

# ===============================
# 🧠 STEP 3: Create Learner (AlexNet)
# ===============================
learn = vision_learner(dls, models.alexnet, metrics=accuracy, pretrained=True)

# ===============================
# 🎯 STEP 4: Fine-tune Model (50 epochs)
# Save best model based on accuracy
# ===============================

# Step: Run learning rate finder
lr_finder = learn.lr_find()
lr_val = lr_finder.valley
print(f"✅ Suggested Learning Rate (valley): {lr_val:.2e}")

# Plot with labeled valley
plt.figure(figsize=(8,5))
learn.recorder.plot_lr_find()
plt.axvline(lr_val, color='red', linestyle='--', label=f'Valley: {lr_val:.2e}')
plt.legend()
plt.title('Learning Rate Finder with Valley Marked')
plt.grid(True)
plt.savefig(visual_dir / 'lr_find_plot_labeled.png', dpi=300, bbox_inches='tight')
plt.close()


learn.fine_tune(
    50,
    base_lr=lr_val,
    cbs=[
        SaveModelCallback(monitor='accuracy', fname='best_model'),
        EarlyStoppingCallback(monitor='accuracy', patience=8)
    ]
)

# Plot training and validation accuracy over epochs
metrics_df = pd.DataFrame(learn.recorder.values, columns=['train_loss', 'valid_loss', 'accuracy'])
plt.figure(figsize=(8, 5))
plt.plot(metrics_df['accuracy'], label='Validation Accuracy', marker='o')
plt.title('Validation Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.savefig(visual_dir / 'val_accuracy_over_epochs.png', dpi=300, bbox_inches='tight')
plt.close()

# ===============================
# 💾 STEP 5: Save Final and Best Model
# ===============================
learn.export(models_dir / 'final_alexnet.pkl')
print(f"✅ Final model exported to: {models_dir / 'final_alexnet.pkl'}")

learn.load('best_model')  # Load the best model before evaluation/export
learn.export(models_dir / 'best_alexnet.pkl')
print(f"✅ Best model exported to: {models_dir / 'best_alexnet.pkl'}")

# ===============================
# 📊 STEP 6: Save Training Curve and Metrics CSV
# ===============================
learn.recorder.plot_loss()
plt.savefig(visual_dir / 'train_loss.png')
plt.close()

# Plot both train and validation loss curves
plt.figure(figsize=(8, 5))
plt.plot(metrics_df['train_loss'], label='Train Loss', marker='o')
plt.plot(metrics_df['valid_loss'], label='Validation Loss', marker='o')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.savefig(visual_dir / 'train_vs_val_loss.png', dpi=300, bbox_inches='tight')
plt.close()

# Save all metrics (train_loss, valid_loss, accuracy)
metrics_df = pd.DataFrame(learn.recorder.values, columns=['train_loss', 'valid_loss', 'accuracy'])
metrics_df.to_csv(visual_dir / 'metrics_over_epochs.csv', index=False)
print(f"✅ Metrics CSV saved to: {visual_dir / 'metrics_over_epochs.csv'}")


# ===============================
# 🔍 STEP 7: Evaluate & Visualize
# ===============================
acc = learn.validate()[1]
print(f"✅ Final Accuracy (best model): {acc:.4f}")

interp = ClassificationInterpretation.from_learner(learn)

interp.plot_confusion_matrix(figsize=(10,10))
plt.savefig(visual_dir / 'confusion_matrix.png')
plt.close()
try:
    interp.plot_top_losses(9, nrows=3)
    plt.savefig(visual_dir / 'top_losses.png')
    plt.close()
except Exception as e:
    print(f"⚠️ Failed to save top_losses.png: {e}")

# Show sample predictions (correct and incorrect)
try:
    interp.plot_top_losses(16, figsize=(12,12))
    plt.savefig(visual_dir / 'top_losses_examples.png', dpi=300, bbox_inches='tight')
    plt.close()
except Exception as e:
    print(f"⚠️ Failed to save top_losses_examples.png: {e}")

# Show a batch with predictions
try:
    learn.show_results(max_n=9, figsize=(10,8))
    plt.savefig(visual_dir / 'sample_predictions.png', dpi=300, bbox_inches='tight')
    plt.close()
except Exception as e:
    print(f"⚠️ Failed to save sample_predictions.png: {e}")

preds, targs = learn.get_preds()

# Calculate class-wise accuracy
acc_per_class = []
for c in range(len(dls.vocab)):
    mask = targs == c
    if mask.sum() > 0:  # Avoid division by zero
        class_acc = (preds[mask].argmax(dim=1) == targs[mask]).float().mean().item()
    else:
        class_acc = 0.0
    acc_per_class.append((dls.vocab[c], class_acc))

# Create and save accuracy DataFrame
acc_df = pd.DataFrame(acc_per_class, columns=['Class', 'Accuracy'])
acc_df.to_csv(visual_dir / 'classwise_accuracy.csv', index=False)

report_dict = classification_report(
    targs.numpy(),
    preds.argmax(dim=1).numpy(),
    target_names=dls.vocab,
    output_dict=True
)

report_df = pd.DataFrame(report_dict).transpose()
report_df.to_csv(visual_dir / 'classification_report.csv')

# Generate confusion matrix
cm = interp.confusion_matrix()

# Create confusion matrix DataFrame
df_cm = pd.DataFrame(cm,  # Convert tensor to numpy array
                    index=dls.vocab,
                    columns=dls.vocab)
df_cm.to_csv(visual_dir / 'confusion_matrix_data.csv')

# Plot top losses with modern formatting
try:
    interp.plot_top_losses(20, nrows=5, figsize=(15,10))
    plt.tight_layout()
    plt.savefig(visual_dir / 'top_losses_per_class.png', dpi=300, bbox_inches='tight')
    plt.close()
except Exception as e:
    print(f"⚠️ Failed to save top_losses_per_class.png: {e}")

# Create normalized confusion matrix
try:
    interp.plot_confusion_matrix(normalize=True, figsize=(15,15))
    plt.savefig(visual_dir / 'confusion_matrix_normalized.png', dpi=300, bbox_inches='tight')
    plt.close()
except Exception as e:
    print(f"⚠️ Failed to save confusion_matrix_normalized.png: {e}")

# Create classification report
from sklearn.metrics import classification_report
print(classification_report(targs.numpy(), preds.argmax(dim=1).numpy(), target_names=dls.vocab))

print("✅ All steps completed. Outputs saved to 'visualizations/' and 'models/'")