In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.cuda.amp import GradScaler, autocast

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CIFAR-10 data loading
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Use 5% of training data (2,500 images)
subset_indices = np.random.choice(len(trainset), size=int(0.05 * len(trainset)), replace=False)
trainloader = DataLoader(trainset, batch_size=64, shuffle=False, sampler=SubsetRandomSampler(subset_indices))
testloader = DataLoader(testset, batch_size=64, shuffle=False)



In [3]:
# AlexNet
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [4]:
# VGG (and variants)
class VGG(nn.Module):
    def __init__(self, num_classes=10, batch_norm=False):
        super(VGG, self).__init__()
        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        self.features = nn.Sequential(*layers)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [5]:
# VGG-8
class VGG8(nn.Module):
    def __init__(self, num_classes=10, batch_norm=False):
        super(VGG8, self).__init__()
        cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 'M']
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        self.features = nn.Sequential(*layers)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 14 * 14, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [6]:
# Training function
def train_model(model, trainloader, testloader, epochs=1):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    scaler = GradScaler()
    train_losses, val_losses, train_accs, val_accs, times = [], [], [], [], []

    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        train_losses.append(running_loss / len(trainloader))
        train_accs.append(100 * correct / total)

        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                with autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_losses.append(val_loss / len(testloader))
        val_accs.append(100 * correct / total)
        times.append(time.time() - start_time)
        print(f'Epoch {epoch+1}, Train Loss: {train_losses[-1]:.3f}, Train Acc: {train_accs[-1]:.2f}%, Val Loss: {val_losses[-1]:.3f}, Val Acc: {val_accs[-1]:.2f}%')

    return train_losses, val_losses, train_accs, val_accs, times

In [9]:
# Test accuracy
def test_accuracy(model, testloader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Visualize feature maps
def visualize_feature_maps(model, layer_idx, input_image, filename):
    model.eval()
    x = input_image.unsqueeze(0).to(device)
    for idx, layer in enumerate(model.features):
        x = layer(x)
        if idx == layer_idx:
            break
    feature_maps = x.squeeze(0).detach().cpu().numpy()
    num_maps = min(feature_maps.shape[0], 8)
    plt.figure(figsize=(15, 5))
    for i in range(num_maps):
        plt.subplot(1, num_maps, i+1)
        plt.imshow(feature_maps[i], cmap='viridis')
        plt.axis('off')
    plt.savefig(filename)
    plt.close()

# Plot curves
def plot_curves(train_losses, val_losses, train_accs, val_accs, title, filename):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title(f'{title} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Acc')
    plt.plot(val_accs, label='Val Acc')
    plt.title(f'{title} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.savefig(filename)
    plt.close()

In [8]:
# Main execution
models = {
    'AlexNet': AlexNet().to(device),
    'VGG': VGG(batch_norm=False).to(device),
    'VGG_BN': VGG(batch_norm=True).to(device),
    'VGG8': VGG8(batch_norm=False).to(device)
}
results = {}

for name, model in models.items():
    print(f'\nTraining {name}...')
    train_losses, val_losses, train_accs, val_accs, times = train_model(model, trainloader, testloader, epochs=1)
    test_acc = test_accuracy(model, testloader)
    results[name] = {'train_losses': train_losses, 'val_losses': val_losses, 'train_accs': train_accs, 'val_accs': val_accs, 'times': times, 'test_acc': test_acc}
    plot_curves(train_losses, val_losses, train_accs, val_accs, name, f'{name}_curves.png')
    input_image = trainset[0][0]
    visualize_feature_maps(model, layer_idx=0, input_image=input_image, filename=f'{name}_feature_maps.png')

print("\nPerformance Comparison:")
print(f"{'Model':<10} {'Test Acc (%)':<12} {'Time/Epoch (s)':<15} {'Overfitting':<12}")
for name, res in results.items():
    avg_time = np.mean(res['times'])
    overfitting = res['train_accs'][-1] - res['val_accs'][-1]
    print(f"{name:<10} {res['test_acc']:<12.2f} {avg_time:<15.2f} {overfitting:<12.2f}")



Training AlexNet...


  scaler = GradScaler()
  with autocast():
  with autocast():


Epoch 1, Train Loss: 2.303, Train Acc: 9.84%, Val Loss: 2.303, Val Acc: 10.00%

Training VGG...
Epoch 1, Train Loss: 2.303, Train Acc: 8.68%, Val Loss: 2.303, Val Acc: 10.00%

Training VGG_BN...
Epoch 1, Train Loss: 2.386, Train Acc: 15.84%, Val Loss: 6.876, Val Acc: 17.35%

Training VGG8...
Epoch 1, Train Loss: 2.302, Train Acc: 12.32%, Val Loss: 2.297, Val Acc: 13.64%

Performance Comparison:
Model      Test Acc (%) Time/Epoch (s)  Overfitting 
AlexNet    10.00        27.81           -0.16       
VGG        10.00        58.97           -1.32       
VGG_BN     17.34        66.38           -1.51       
VGG8       13.64        42.94           -1.32       
