In [1]:
import cv2

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score, accuracy_score
import cv2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----------------------------
# Transforms (same as feature extraction)
# ----------------------------
normalize = transforms.Normalize((0.5,), (0.5,))

base_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor()
])

normalized_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    normalize
])

augment_transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomResizedCrop(28, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2),
    transforms.ToTensor(),
    normalize
])

transform_variants = {
    "Base": base_transform,
    "Normalized": normalized_transform,
    "Augmented": augment_transform,
    # "Regularized" uses same augment pipeline but turns on dropout + weight_decay
    "Regularized": augment_transform
}



In [3]:
# ----------------------------
# Simple CNN class with configurable dropout (dropout only used in classifier)
# ----------------------------
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.0):
        super(SimpleCNN, self).__init__()
        # conv blocks
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # adaptive pool to avoid hardcoding spatial dims
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))
        # classifier with configurable dropout
        self.dropout_prob = dropout_prob
        self.dropout = nn.Dropout(dropout_prob) if dropout_prob > 0 else nn.Identity()
        self.fc1 = nn.Linear(128 * 7 * 7, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))                  # -> 32 x H x W
        x = self.pool(F.relu(self.conv2(x)))       # -> 64 x H/2 x W/2
        x = self.pool(F.relu(self.conv3(x)))       # -> 128 x H/4 x W/4
        x = self.adaptive_pool(x)                  # -> 128 x 7 x 7
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)                        # dropout only active if dropout_prob>0
        x = self.fc2(x)
        return x



In [4]:
# ----------------------------
# Training & evaluation helpers
# ----------------------------
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    avg_loss = running_loss / len(loader)
    acc = 100.0 * correct / total
    return avg_loss, acc

def evaluate(model, loader, criterion=None):
    model.eval()
    preds_all, labels_all = [], []
    total_loss = 0.0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)
            preds_all.extend(preds.cpu().numpy())
            labels_all.extend(labels.cpu().numpy())
            if criterion is not None:
                total_loss += criterion(outputs, labels).item()
    loss = (total_loss / len(loader)) if (criterion is not None) else None
    acc = 100.0 * (np.array(preds_all) == np.array(labels_all)).mean()
    return acc, loss, np.array(preds_all), np.array(labels_all)



In [5]:
# ----------------------------
# Grad-CAM (uses last conv layer conv3)
# ----------------------------
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        self._register_hooks()

    def _register_hooks(self):
        def forward_hook(module, input, output):
            self.activations = output.detach()
        def backward_hook(module, grad_input, grad_output):
            self.gradients = grad_output[0].detach()
        self.target_layer.register_forward_hook(forward_hook)
        self.target_layer.register_backward_hook(backward_hook)

    def generate(self, input_tensor, target_class=None):
        self.model.zero_grad()
        out = self.model(input_tensor)
        if target_class is None:
            target_class = out.argmax(dim=1).item()
        one_hot = torch.zeros_like(out)
        one_hot[0, target_class] = 1
        out.backward(gradient=one_hot, retain_graph=True)

        grads = self.gradients        # BxCxhxw
        acts = self.activations      # BxCxhxw
        weights = grads.mean(dim=(2,3), keepdim=True)  # BxCx1x1
        cam = (weights * acts).sum(dim=1)              # Bxhxw
        cam = F.relu(cam)
        cam = cam.squeeze().cpu().numpy()
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
        return cam, target_class



In [6]:
# ----------------------------
# Main loop: training all variations properly (dropout + weight_decay only for Regularized)
# ----------------------------
results = {}
num_epochs = 10
batch_size = 128
criterion = nn.CrossEntropyLoss()

for name, transform in transform_variants.items():
    print(f"\n=== Running variant: {name} ===")
    train_ds = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
    test_ds  = datasets.MNIST(root="./data", train=False, download=True, transform=transform)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=1000, shuffle=False)

    # regularize logic: enable dropout in model and weight decay in optimizer only for 'Regularized'
    is_regularize = (name == "Regularized")
    dropout_prob = 0.5 if is_regularize else 0.0

    model = SimpleCNN(num_classes=10, dropout_prob=dropout_prob).to(device)

    weight_decay = 1e-4 if is_regularize else 0.0
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=weight_decay)

    # History containers
    history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}

    # Train
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
        val_acc, val_loss, _, _ = evaluate(model, test_loader, criterion)
        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {train_loss:.4f} Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} Val Acc: {val_acc:.2f}%")

    # Final evaluation metrics (detailed)
    test_acc, _, preds, labels = evaluate(model, test_loader)
    prec = precision_score(labels, preds, average='macro')
    rec  = recall_score(labels, preds, average='macro')
    f1   = f1_score(labels, preds, average='macro')
    print(f"Final Test Acc: {test_acc:.2f}%  Prec: {prec:.4f} Rec: {rec:.4f} F1: {f1:.4f}")
    print("Classification report:\n", classification_report(labels, preds, digits=4))

    # Confusion matrix
    cm = confusion_matrix(labels, preds)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"{name} - Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

    # Grad-CAM: visualize one example per class (2x5 grid)
    gradcam = GradCAM(model, model.conv3)
    fig, axes = plt.subplots(2, 5, figsize=(12,5))
    axes_flat = axes.flatten()
    for i in range(10):
        idx = np.where(np.array(test_ds.targets) == i)[0][0]
        img, _ = test_ds[idx]
        inp = img.unsqueeze(0).to(device)
        inp.requires_grad_()
        cam, cls = gradcam.generate(inp)
        cam_resized = cv2.resize(cam, (img.shape[2], img.shape[1]))
        ax = axes_flat[i]
        ax.imshow(img.squeeze().cpu().numpy(), cmap='gray')
        ax.imshow(cam_resized, cmap='jet', alpha=0.5)
        ax.set_title(f"Label {i}, Pred {cls}")
        ax.axis("off")
    plt.suptitle(f"Grad-CAM - {name}")
    plt.tight_layout()
    plt.show()

    # store results
    results[name] = {"history": history, "test_acc": test_acc, "prec": prec, "rec": rec, "f1": f1, "cm": cm}




=== Running variant: Base ===


KeyboardInterrupt: 

In [None]:
# Summary bar chart (final test accuracy)
plt.figure(figsize=(8,4))
names = list(results.keys())
accs  = [results[n]["test_acc"] for n in names]
plt.bar(names, accs, color='skyblue')
plt.title("Final Test Accuracy per Variation")
plt.ylabel("Accuracy (%)")
plt.show()
