In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import models, datasets, transforms
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch import amp
import numpy as np
import random

In [None]:
# =============================
# Seeding for Reproducibility
# =============================
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set your desired seed
SEED = 42
seed_everything(SEED)

In [None]:
# =============================
# Configuration
# =============================
data_dir = "/kaggle/input/sports-102/Sports102_V2"
output_dir = "/kaggle/working/vitb16_sports102_outputs"
os.makedirs(output_dir, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
num_epochs = 50
learning_rate = 2e-4
log_interval = 10  # Log loss every 10 mini-batches
img_size = 224

In [None]:
# =============================
# Data Loading and Transforms
# =============================
train_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# This split is now reproducible thanks to torch.manual_seed()
full_train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform)
train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
test_dataset = datasets.ImageFolder(os.path.join(data_dir, "test"), transform=test_transform)

# --- CHANGES FOR DATALOADER ---
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(SEED) # Use the same seed as before

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    worker_init_fn=seed_worker,
    generator=g
)
# -----------------------------

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

num_classes = len(full_train_dataset.classes)
print(f"Number of classes: {num_classes}")

In [None]:
# =============================
# Model Setup
# =============================
# 1. Load the pretrained ViT model with its original weights
model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1)

# 2. Replace the final classification layer (the "head")
# The original ViT head classifies 1000 classes (ImageNet). 
# We replace it with a new linear layer for our 102 classes.
num_features = model.heads.head.in_features
model.heads.head = nn.Linear(num_features, num_classes)

# 3. Move the model to the GPU
model = model.to(device)

# --- The rest of the setup remains the same ---
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scaler = torch.amp.GradScaler()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=4, factor=0.5)
print(f"Initial learning rate: {scheduler.get_last_lr()}")

In [None]:
# =============================
# Resume Checkpoint
# =============================
start_epoch = 0
best_val_acc = 0.0
checkpoint_path = os.path.join(output_dir, "checkpoint.pth")
best_model_path = os.path.join(output_dir, "best_model.pth")

if os.path.exists(checkpoint_path):
    print("Resuming from checkpoint...")
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
    best_val_acc = checkpoint["best_val_acc"]
    start_epoch = checkpoint["epoch"] + 1
    print(f"Resumed from epoch {start_epoch} with best val acc {best_val_acc:.4f}")


In [None]:
# =============================
# Training Loop
# =============================
train_losses = []
val_losses = []
best_val_acc = 0.0

for epoch in range(start_epoch, num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    global_step = epoch * len(train_loader)

    for i, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        with amp.autocast(device_type="cuda"):
            outputs = model(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()


    train_acc = 100 * correct / total
    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss)

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad(), amp.autocast(device_type="cuda"):
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()

    val_acc = 100 * val_correct / val_total
    val_loss /= len(val_loader)
    val_losses.append(val_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")


    scheduler.step(val_acc)

    # Checkpointing (every epoch)
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scheduler_state_dict": scheduler.state_dict(),
        "best_val_acc": best_val_acc,
    }, checkpoint_path)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)
        print("✅ Saved best model")

In [None]:
# =============================
# Evaluation (Train, Val, Test)
# =============================
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

def evaluate(loader, name):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad(), torch.amp.autocast("cuda"):
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    report = classification_report(all_labels, all_preds, output_dict=True, target_names=full_train_dataset.classes)
    print(f"\n{name} Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=full_train_dataset.classes))
    acc = report["accuracy"] * 100
    return report, all_labels, all_preds

def plot_confusion_matrix(y_true, y_pred, class_names, normalize=False, figsize=(30, 30), fontsize=6, save_path=None):
    cm = confusion_matrix(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.figure(figsize=figsize)
    sns.heatmap(cm, annot=True, fmt=".2f" if normalize else "d", cmap="Blues",
                xticklabels=class_names, yticklabels=class_names, cbar=True)

    plt.ylabel('True label', fontsize=fontsize + 2)
    plt.xlabel('Predicted label', fontsize=fontsize + 2)
    plt.title('Confusion Matrix', fontsize=fontsize + 4)
    plt.xticks(rotation=90, fontsize=fontsize)
    plt.yticks(rotation=0, fontsize=fontsize)
    plt.tight_layout()

    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"✅ Confusion matrix saved to: {save_path}")

    plt.show()

print("\nLoading best model for final evaluation...")
model.load_state_dict(torch.load(best_model_path))

train_report, _, _ = evaluate(train_loader, "Train")
val_report, _, _ = evaluate(val_loader, "Val")
test_report, y_true, y_pred = evaluate(test_loader, "Test")

# Print all accuracies together
print("\n📊 Final Accuracies:")
print(f"Train Accuracy: {train_report['accuracy']*100:.2f}%")
print(f"Validation Accuracy: {val_report['accuracy']*100:.2f}%")
print(f"✅ Test Accuracy: {test_report['accuracy']*100:.2f}%")

# Confusion Matrix for Test Set
save_path = os.path.join(output_dir, "confusion_matrix.png")

print("\n🔹 Generating and saving confusion matrix for test set...")
plot_confusion_matrix(
    y_true=y_true,
    y_pred=y_pred,
    class_names=full_train_dataset.classes,
    normalize=False,  # Change to True if normalized matrix desired
    figsize=(30, 30),
    fontsize=6,
    save_path=save_path
)

print("✅ Training complete. Confusion matrix saved.")
