In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pandas as pd
import time
import os
from tqdm.auto import tqdm


## Experiment Configuration and Details

### Variables:
- **Datasets**: MNIST, FashionMNIST
- **Models**: ResNet-18, ResNet-50
- **Batch Size**: 16
- **Optimizers**: SGD, Adam
- **Learning Rates**: 0.001, 0.0001
- **Epochs**: 3, 5
- **Pin Memory**: False, True

### Constants:
- **USE_AMP**: True (Automatic Mixed Precision enabled)
- **Train-Val-Test Split**: 70%-10%-20%
- **Image Size**: 64x64 (resized from 28x28)

## FLOPs Calculation

The `count_model_flops()` function calculates actual FLOPs (Floating Point Operations) by:
1. **Profiling the model** with a 64×64×3 input tensor
2. **Counting operations** in:
   - Conv2D layers: kernel_size × channels × output_elements
   - Linear layers: in_features × out_features  
   - BatchNorm layers: 2 × number_of_elements

This gives us the **real computational cost** for each model configuration, not just estimates.


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
os.makedirs('results', exist_ok=True)
os.makedirs('models', exist_ok=True)


In [3]:
def load_and_split_dataset(dataset_name, batch_size, pin_memory=False):
    transform_mnist = transforms.Compose([
        transforms.Grayscale(3),
        transforms.Resize(64),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    
    if dataset_name == 'MNIST':
        full_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform_mnist)
        test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform_mnist)
    else:
        full_dataset = datasets.FashionMNIST('./data', train=True, download=True, transform=transform_mnist)
        test_dataset = datasets.FashionMNIST('./data', train=False, download=True, transform=transform_mnist)
    
    train_size = int(0.7 * len(full_dataset))
    val_size = int(0.1 * len(full_dataset))
    remaining = len(full_dataset) - train_size - val_size
    
    train_dataset, val_dataset, _ = random_split(
        full_dataset, 
        [train_size, val_size, remaining],
        generator=torch.Generator().manual_seed(42)
    )
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=pin_memory)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=pin_memory)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=pin_memory)
    
    return train_loader, val_loader, test_loader


In [4]:
def get_model(model_name, num_classes=10):
    if model_name == 'ResNet-18':
        model = models.resnet18(weights=None)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif model_name == 'ResNet-50':
        model = models.resnet50(weights=None)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model


In [5]:
def train_epoch(model, loader, criterion, optimizer, device, scaler, use_amp=True):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training', leave=False)
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        if use_amp and device.type == 'cuda':
            with torch.cuda.amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({'loss': f'{running_loss/len(loader):.4f}', 'acc': f'{100.*correct/total:.2f}%'})
    
    return running_loss / len(loader), 100. * correct / total


In [6]:
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(loader), 100. * correct / total


In [7]:
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=3, use_amp=True):
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    best_val_acc = 0.0
    best_model_state = None
    scaler = torch.cuda.amp.GradScaler()
    
    for epoch in tqdm(range(epochs), desc='Epochs'):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, scaler, use_amp)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        
        tqdm.write(f'Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    return model, train_losses, train_accs, val_losses, val_accs


In [8]:
def plot_training_curves(train_losses, train_accs, val_losses, val_accs, save_path):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    ax1.plot(train_losses, label='Train Loss')
    ax1.plot(val_losses, label='Val Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)
    
    ax2.plot(train_accs, label='Train Accuracy')
    ax2.plot(val_accs, label='Val Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    plt.close()


In [9]:
results_q1a = []
epoch_values = [3, 5]
pin_memory_values = [False, True]
USE_AMP = True

configs = [
    {'batch_size': 16, 'optimizer': 'SGD', 'lr': 0.001},
    {'batch_size': 16, 'optimizer': 'SGD', 'lr': 0.0001},
    {'batch_size': 16, 'optimizer': 'Adam', 'lr': 0.001},
    {'batch_size': 16, 'optimizer': 'Adam', 'lr': 0.0001},
]


In [None]:
total_experiments = len(['MNIST', 'FashionMNIST']) * len(configs) * len(['ResNet-18', 'ResNet-50']) * len(epoch_values) * len(pin_memory_values)
exp_counter = 0

for dataset_name in ['MNIST', 'FashionMNIST']:
    for config in configs:
        batch_size = config['batch_size']
        opt_name = config['optimizer']
        lr = config['lr']
        
        for pin_mem in pin_memory_values:
            train_loader, val_loader, test_loader = load_and_split_dataset(dataset_name, batch_size, pin_mem)
            
            for model_name in ['ResNet-18', 'ResNet-50']:
                for epochs in epoch_values:
                    exp_counter += 1
                    exp_name = f"{dataset_name}_{model_name}_bs{batch_size}_{opt_name}_lr{lr}_ep{epochs}_pm{pin_mem}"
                    print(f"\n{'='*80}")
                    print(f"Experiment {exp_counter}/{total_experiments}: {exp_name}")
                    print(f"{'='*80}")
                    
                    model = get_model(model_name, num_classes=10).to(device)
                    criterion = nn.CrossEntropyLoss()
                    
                    if opt_name == 'SGD':
                        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
                    else:
                        optimizer = optim.Adam(model.parameters(), lr=lr)
                    
                    model, train_losses, train_accs, val_losses, val_accs = train_model(
                        model, train_loader, val_loader, optimizer, criterion, device, epochs, USE_AMP
                    )
                    
                    _, test_acc = validate(model, test_loader, criterion, device)
                    
                    torch.save(model.state_dict(), f'models/{exp_name}.pth')
                    plot_training_curves(train_losses, train_accs, val_losses, val_accs, 
                                       f'results/{exp_name}.png')
                    
                    print(f"✓ Completed - Test Accuracy: {test_acc:.2f}%")
                    
                    results_q1a.append({
                        'Dataset': dataset_name,
                        'Batch Size': batch_size,
                        'Optimizer': opt_name,
                        'Learning Rate': lr,
                        'Epochs': epochs,
                        'Pin Memory': pin_mem,
                        'USE_AMP': USE_AMP,
                        'Model': model_name,
                        'Test Accuracy': f"{test_acc:.2f}"
                    })


100%|██████████| 9.91M/9.91M [00:00<00:00, 40.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.20MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 10.8MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.5MB/s]



Experiment 1/64: MNIST_ResNet-18_bs16_SGD_lr0.001_ep3_pmFalse


  scaler = torch.cuda.amp.GradScaler()


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast():


Epoch 1/3 - Train Loss: 0.1678, Train Acc: 95.05%, Val Loss: 0.0489, Val Acc: 98.50%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0438, Train Acc: 98.68%, Val Loss: 0.0345, Val Acc: 99.12%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0267, Train Acc: 99.21%, Val Loss: 0.0274, Val Acc: 99.18%
✓ Completed - Test Accuracy: 99.16%

Experiment 2/64: MNIST_ResNet-18_bs16_SGD_lr0.001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.1651, Train Acc: 95.22%, Val Loss: 0.0513, Val Acc: 98.53%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0431, Train Acc: 98.63%, Val Loss: 0.0377, Val Acc: 98.87%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0262, Train Acc: 99.22%, Val Loss: 0.0328, Val Acc: 99.10%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0147, Train Acc: 99.62%, Val Loss: 0.0264, Val Acc: 99.25%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0087, Train Acc: 99.79%, Val Loss: 0.0281, Val Acc: 99.27%
✓ Completed - Test Accuracy: 99.16%

Experiment 3/64: MNIST_ResNet-50_bs16_SGD_lr0.001_ep3_pmFalse


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.3112, Train Acc: 90.18%, Val Loss: 0.1169, Val Acc: 96.47%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0784, Train Acc: 97.69%, Val Loss: 0.0469, Val Acc: 98.60%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0417, Train Acc: 98.70%, Val Loss: 0.0506, Val Acc: 98.43%
✓ Completed - Test Accuracy: 98.64%

Experiment 4/64: MNIST_ResNet-50_bs16_SGD_lr0.001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.3242, Train Acc: 89.81%, Val Loss: 0.0972, Val Acc: 96.93%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0888, Train Acc: 97.40%, Val Loss: 0.0779, Val Acc: 97.78%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0467, Train Acc: 98.60%, Val Loss: 0.0457, Val Acc: 98.85%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0295, Train Acc: 99.05%, Val Loss: 0.0470, Val Acc: 98.70%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0191, Train Acc: 99.38%, Val Loss: 0.0461, Val Acc: 98.85%
✓ Completed - Test Accuracy: 98.86%

Experiment 5/64: MNIST_ResNet-18_bs16_SGD_lr0.001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.1584, Train Acc: 95.46%, Val Loss: 0.0416, Val Acc: 98.62%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0445, Train Acc: 98.66%, Val Loss: 0.0447, Val Acc: 98.63%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0253, Train Acc: 99.20%, Val Loss: 0.0316, Val Acc: 99.05%
✓ Completed - Test Accuracy: 99.04%

Experiment 6/64: MNIST_ResNet-18_bs16_SGD_lr0.001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.1600, Train Acc: 95.47%, Val Loss: 0.0450, Val Acc: 98.75%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0451, Train Acc: 98.61%, Val Loss: 0.0351, Val Acc: 98.90%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0249, Train Acc: 99.25%, Val Loss: 0.0315, Val Acc: 99.05%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0173, Train Acc: 99.47%, Val Loss: 0.0338, Val Acc: 99.03%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0116, Train Acc: 99.68%, Val Loss: 0.0289, Val Acc: 99.22%
✓ Completed - Test Accuracy: 99.30%

Experiment 7/64: MNIST_ResNet-50_bs16_SGD_lr0.001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.3331, Train Acc: 89.54%, Val Loss: 0.0945, Val Acc: 96.90%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0815, Train Acc: 97.50%, Val Loss: 0.0510, Val Acc: 98.48%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0470, Train Acc: 98.62%, Val Loss: 0.0491, Val Acc: 98.68%
✓ Completed - Test Accuracy: 98.55%

Experiment 8/64: MNIST_ResNet-50_bs16_SGD_lr0.001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.3159, Train Acc: 89.93%, Val Loss: 0.1007, Val Acc: 97.32%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0793, Train Acc: 97.62%, Val Loss: 0.0502, Val Acc: 98.53%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0491, Train Acc: 98.50%, Val Loss: 0.0464, Val Acc: 98.55%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0244, Train Acc: 99.25%, Val Loss: 0.0436, Val Acc: 98.87%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0187, Train Acc: 99.41%, Val Loss: 0.0497, Val Acc: 98.57%
✓ Completed - Test Accuracy: 98.47%

Experiment 9/64: MNIST_ResNet-18_bs16_SGD_lr0.0001_ep3_pmFalse


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.5274, Train Acc: 88.17%, Val Loss: 0.1406, Val Acc: 96.48%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.1370, Train Acc: 96.45%, Val Loss: 0.0852, Val Acc: 97.72%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0877, Train Acc: 97.63%, Val Loss: 0.0640, Val Acc: 98.17%
✓ Completed - Test Accuracy: 98.15%

Experiment 10/64: MNIST_ResNet-18_bs16_SGD_lr0.0001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.5519, Train Acc: 87.67%, Val Loss: 0.1510, Val Acc: 96.05%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.1386, Train Acc: 96.39%, Val Loss: 0.0880, Val Acc: 97.78%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0899, Train Acc: 97.52%, Val Loss: 0.0663, Val Acc: 98.33%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0680, Train Acc: 98.16%, Val Loss: 0.0576, Val Acc: 98.35%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0525, Train Acc: 98.61%, Val Loss: 0.0482, Val Acc: 98.65%
✓ Completed - Test Accuracy: 98.51%

Experiment 11/64: MNIST_ResNet-50_bs16_SGD_lr0.0001_ep3_pmFalse


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 1.1255, Train Acc: 63.78%, Val Loss: 0.2931, Val Acc: 91.28%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.2519, Train Acc: 92.57%, Val Loss: 0.1370, Val Acc: 95.58%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.1458, Train Acc: 95.62%, Val Loss: 0.0975, Val Acc: 96.90%
✓ Completed - Test Accuracy: 97.12%

Experiment 12/64: MNIST_ResNet-50_bs16_SGD_lr0.0001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 1.1272, Train Acc: 64.10%, Val Loss: 0.3028, Val Acc: 91.13%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.2530, Train Acc: 92.60%, Val Loss: 0.1520, Val Acc: 95.20%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.1487, Train Acc: 95.64%, Val Loss: 0.0993, Val Acc: 96.93%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.1048, Train Acc: 96.90%, Val Loss: 0.0763, Val Acc: 97.75%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0768, Train Acc: 97.76%, Val Loss: 0.0700, Val Acc: 97.75%
✓ Completed - Test Accuracy: 97.80%

Experiment 13/64: MNIST_ResNet-18_bs16_SGD_lr0.0001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.5638, Train Acc: 87.19%, Val Loss: 0.1429, Val Acc: 96.60%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.1347, Train Acc: 96.42%, Val Loss: 0.0831, Val Acc: 97.77%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0890, Train Acc: 97.62%, Val Loss: 0.0640, Val Acc: 98.30%
✓ Completed - Test Accuracy: 98.33%

Experiment 14/64: MNIST_ResNet-18_bs16_SGD_lr0.0001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.5474, Train Acc: 87.55%, Val Loss: 0.1481, Val Acc: 96.10%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.1382, Train Acc: 96.37%, Val Loss: 0.0861, Val Acc: 97.43%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0909, Train Acc: 97.54%, Val Loss: 0.0657, Val Acc: 98.05%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0676, Train Acc: 98.19%, Val Loss: 0.0540, Val Acc: 98.37%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0547, Train Acc: 98.54%, Val Loss: 0.0455, Val Acc: 98.67%
✓ Completed - Test Accuracy: 98.71%

Experiment 15/64: MNIST_ResNet-50_bs16_SGD_lr0.0001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 1.1364, Train Acc: 63.80%, Val Loss: 0.3116, Val Acc: 91.23%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.2834, Train Acc: 91.62%, Val Loss: 0.1539, Val Acc: 95.37%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.1693, Train Acc: 94.93%, Val Loss: 0.1151, Val Acc: 96.52%
✓ Completed - Test Accuracy: 96.59%

Experiment 16/64: MNIST_ResNet-50_bs16_SGD_lr0.0001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 1.2201, Train Acc: 60.37%, Val Loss: 0.3262, Val Acc: 90.45%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.2655, Train Acc: 92.28%, Val Loss: 0.1493, Val Acc: 95.23%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.1541, Train Acc: 95.32%, Val Loss: 0.1042, Val Acc: 96.62%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.1092, Train Acc: 96.71%, Val Loss: 0.0847, Val Acc: 97.23%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0776, Train Acc: 97.70%, Val Loss: 0.0713, Val Acc: 97.75%
✓ Completed - Test Accuracy: 97.96%

Experiment 17/64: MNIST_ResNet-18_bs16_Adam_lr0.001_ep3_pmFalse


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.1446, Train Acc: 95.67%, Val Loss: 0.0583, Val Acc: 98.43%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0647, Train Acc: 98.05%, Val Loss: 0.0509, Val Acc: 98.67%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0450, Train Acc: 98.72%, Val Loss: 0.0396, Val Acc: 98.97%
✓ Completed - Test Accuracy: 98.90%

Experiment 18/64: MNIST_ResNet-18_bs16_Adam_lr0.001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.1445, Train Acc: 95.81%, Val Loss: 0.0538, Val Acc: 98.33%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0660, Train Acc: 98.11%, Val Loss: 0.0600, Val Acc: 98.32%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0476, Train Acc: 98.55%, Val Loss: 0.0431, Val Acc: 98.78%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0368, Train Acc: 98.92%, Val Loss: 0.0547, Val Acc: 98.48%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0315, Train Acc: 99.15%, Val Loss: 0.0337, Val Acc: 99.07%
✓ Completed - Test Accuracy: 99.04%

Experiment 19/64: MNIST_ResNet-50_bs16_Adam_lr0.001_ep3_pmFalse


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.2331, Train Acc: 93.51%, Val Loss: 0.1133, Val Acc: 97.25%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.1210, Train Acc: 96.89%, Val Loss: 0.0608, Val Acc: 98.15%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0747, Train Acc: 97.86%, Val Loss: 0.0645, Val Acc: 98.08%
✓ Completed - Test Accuracy: 98.40%

Experiment 20/64: MNIST_ResNet-50_bs16_Adam_lr0.001_ep5_pmFalse


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.2414, Train Acc: 93.21%, Val Loss: 0.0767, Val Acc: 97.77%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.1166, Train Acc: 96.81%, Val Loss: 0.0668, Val Acc: 98.05%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0831, Train Acc: 97.72%, Val Loss: 0.0736, Val Acc: 98.02%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0705, Train Acc: 98.05%, Val Loss: 0.0557, Val Acc: 98.38%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0502, Train Acc: 98.55%, Val Loss: 0.0579, Val Acc: 98.37%
✓ Completed - Test Accuracy: 98.23%

Experiment 21/64: MNIST_ResNet-18_bs16_Adam_lr0.001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.1452, Train Acc: 95.73%, Val Loss: 0.1097, Val Acc: 96.75%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.0668, Train Acc: 97.97%, Val Loss: 0.0646, Val Acc: 98.03%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0508, Train Acc: 98.45%, Val Loss: 0.0308, Val Acc: 99.20%
✓ Completed - Test Accuracy: 99.13%

Experiment 22/64: MNIST_ResNet-18_bs16_Adam_lr0.001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.1426, Train Acc: 95.71%, Val Loss: 0.0647, Val Acc: 98.07%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.0661, Train Acc: 98.15%, Val Loss: 0.0569, Val Acc: 98.35%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0514, Train Acc: 98.47%, Val Loss: 0.0460, Val Acc: 98.63%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 4/5 - Train Loss: 0.0365, Train Acc: 98.95%, Val Loss: 0.0385, Val Acc: 98.88%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 5/5 - Train Loss: 0.0318, Train Acc: 99.03%, Val Loss: 0.0270, Val Acc: 99.32%
✓ Completed - Test Accuracy: 99.23%

Experiment 23/64: MNIST_ResNet-50_bs16_Adam_lr0.001_ep3_pmTrue


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/3 - Train Loss: 0.2332, Train Acc: 93.45%, Val Loss: 0.1811, Val Acc: 95.08%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/3 - Train Loss: 0.1227, Train Acc: 96.79%, Val Loss: 0.0915, Val Acc: 97.55%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/3 - Train Loss: 0.0950, Train Acc: 97.50%, Val Loss: 0.0565, Val Acc: 98.37%
✓ Completed - Test Accuracy: 98.56%

Experiment 24/64: MNIST_ResNet-50_bs16_Adam_lr0.001_ep5_pmTrue


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 1/5 - Train Loss: 0.2385, Train Acc: 93.47%, Val Loss: 0.1741, Val Acc: 95.42%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 2/5 - Train Loss: 0.1103, Train Acc: 96.99%, Val Loss: 0.1361, Val Acc: 95.67%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

Epoch 3/5 - Train Loss: 0.0809, Train Acc: 97.81%, Val Loss: 0.0718, Val Acc: 97.93%


Training:   0%|          | 0/2625 [00:00<?, ?it/s]

In [None]:
df_q1a = pd.DataFrame(results_q1a)
df_q1a.to_csv('results/q1a_results.csv', index=False)
df_q1a


In [None]:
def load_dataset_for_svm(dataset_name):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    
    if dataset_name == 'MNIST':
        train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
        test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)
    else:
        train_dataset = datasets.FashionMNIST('./data', train=True, download=True, transform=transform)
        test_dataset = datasets.FashionMNIST('./data', train=False, download=True, transform=transform)
    
    X_train = train_dataset.data.numpy().reshape(len(train_dataset), -1) / 255.0
    y_train = train_dataset.targets.numpy()
    X_test = test_dataset.data.numpy().reshape(len(test_dataset), -1) / 255.0
    y_test = test_dataset.targets.numpy()
    
    sample_size = min(10000, len(X_train))
    indices = np.random.choice(len(X_train), sample_size, replace=False)
    X_train = X_train[indices]
    y_train = y_train[indices]
    
    return X_train, y_train, X_test, y_test


In [None]:
results_q1b = []

print(f"\n{'='*80}")
print("Q1(b) - SVM Training")
print(f"{'='*80}")

for dataset_name in ['MNIST', 'FashionMNIST']:
    X_train, y_train, X_test, y_test = load_dataset_for_svm(dataset_name)
    
    for kernel in ['poly', 'rbf']:
        print(f"\nTraining SVM on {dataset_name} with {kernel} kernel...")
        start_time = time.time()
        
        svm = SVC(kernel=kernel, gamma='scale', max_iter=1000)
        svm.fit(X_train, y_train)
        
        train_time = (time.time() - start_time) * 1000
        
        y_pred = svm.predict(X_test)
        test_acc = accuracy_score(y_test, y_pred) * 100
        
        print(f"✓ Completed - Test Accuracy: {test_acc:.2f}%, Training Time: {train_time:.2f}ms")
        
        results_q1b.append({
            'Dataset': dataset_name,
            'Kernel': kernel,
            'Test Accuracy': f"{test_acc:.2f}",
            'Training Time (ms)': f"{train_time:.2f}"
        })


In [None]:
df_q1b = pd.DataFrame(results_q1b)
df_q1b.to_csv('results/q1b_results.csv', index=False)
df_q1b


In [None]:
def count_model_flops(model, input_size=64, device='cpu'):
    model_copy = model.to('cpu')
    model_copy.eval()
    
    input_tensor = torch.randn(1, 3, input_size, input_size)
    
    total_flops = 0
    
    def count_conv2d(m, x, y):
        nonlocal total_flops
        cin = m.in_channels
        kernel_ops = m.kernel_size[0] * m.kernel_size[1] * (cin // m.groups)
        output_elements = y.numel()
        total_flops += kernel_ops * output_elements
    
    def count_linear(m, x, y):
        nonlocal total_flops
        total_flops += m.in_features * m.out_features
    
    def count_bn(m, x, y):
        nonlocal total_flops
        total_flops += 2 * x[0].numel()
    
    hooks = []
    for name, module in model_copy.named_modules():
        if isinstance(module, nn.Conv2d):
            hooks.append(module.register_forward_hook(count_conv2d))
        elif isinstance(module, nn.Linear):
            hooks.append(module.register_forward_hook(count_linear))
        elif isinstance(module, (nn.BatchNorm2d, nn.BatchNorm1d)):
            hooks.append(module.register_forward_hook(count_bn))
    
    with torch.no_grad():
        model_copy(input_tensor)
    
    for hook in hooks:
        hook.remove()
    
    model.to(device)
    return total_flops


In [None]:
sample_resnet18 = get_model('ResNet-18', num_classes=10)
sample_resnet50 = get_model('ResNet-50', num_classes=10)

flops_r18 = count_model_flops(sample_resnet18, input_size=64)
flops_r50 = count_model_flops(sample_resnet50, input_size=64)

print(f"Actual FLOPs for 64x64 input:")
print(f"ResNet-18: {flops_r18:,} FLOPs ({flops_r18:.2e})")
print(f"ResNet-50: {flops_r50:,} FLOPs ({flops_r50:.2e})")

del sample_resnet18, sample_resnet50


In [None]:
results_q2 = []
dataset_name = 'FashionMNIST'
batch_size = 16
epoch_values_q2 = [3, 5]
pin_memory_values_q2 = [False, True]
USE_AMP_Q2 = True

configs_q2 = [
    {'optimizer': 'SGD', 'lr': 0.001},
    {'optimizer': 'Adam', 'lr': 0.001},
]


In [2]:
print(f"\n{'='*80}")
print("Q2 - CPU vs GPU Performance")
print(f"{'='*80}")

for compute in ['CPU', 'GPU']:
    device_q2 = torch.device('cpu' if compute == 'CPU' else 'cuda')
    
    if compute == 'GPU' and not torch.cuda.is_available():
        print(f"\n⚠ GPU not available, skipping GPU experiments")
        continue
    
    print(f"\n--- Running on {compute} ---")
    
    for pin_mem in pin_memory_values_q2:
        train_loader, val_loader, test_loader = load_and_split_dataset(dataset_name, batch_size, pin_mem)
        
        for config in configs_q2:
            opt_name = config['optimizer']
            lr = config['lr']
            
            for model_name in ['ResNet-18', 'ResNet-50']:
                for epochs in epoch_values_q2:
                    exp_name = f"{compute}_{model_name}_{opt_name}_lr{lr}_ep{epochs}_pm{pin_mem}"
                    print(f"\n{exp_name}")
                    
                    model = get_model(model_name, num_classes=10).to(device_q2)
                    criterion = nn.CrossEntropyLoss()
                    
                    if opt_name == 'SGD':
                        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
                    else:
                        optimizer = optim.Adam(model.parameters(), lr=lr)
                    
                    start_time = time.time()
                    model, _, _, _, _ = train_model(
                        model, train_loader, val_loader, optimizer, criterion, device_q2, epochs, USE_AMP_Q2
                    )
                    train_time = (time.time() - start_time) * 1000
                    
                    _, test_acc = validate(model, test_loader, criterion, device_q2)
                    
                    flops = count_model_flops(model, input_size=64, device=device_q2)
                    
                    print(f"✓ Test Accuracy: {test_acc:.2f}%, Training Time: {train_time:.2f}ms, FLOPs: {flops:.2e}")
                    
                    results_q2.append({
                        'Compute': compute,
                        'Batch Size': batch_size,
                        'Optimizer': opt_name,
                        'Learning Rate': lr,
                        'Epochs': epochs,
                        'Pin Memory': pin_mem,
                        'USE_AMP': USE_AMP_Q2,
                        'Model': model_name,
                        'Test Accuracy': f"{test_acc:.2f}",
                        'Training Time (ms)': f"{train_time:.2f}",
                        'FLOPs': f"{flops:.2e}"
                    })





Q2 - CPU vs GPU Performance

--- Running on CPU ---

CPU_ResNet-18_SGD_lr0.001_ep3_pmFalse
✓ Test Accuracy: 86.56%, Training Time: 3269880.52ms, FLOPs: 5.16e+08

CPU_ResNet-18_SGD_lr0.001_ep5_pmFalse
✓ Test Accuracy: 88.32%, Training Time: 5449800.87ms, FLOPs: 5.16e+08

CPU_ResNet-18_Adam_lr0.001_ep3_pmFalse
✓ Test Accuracy: 85.46%, Training Time: 3090956.71ms, FLOPs: 5.16e+08

CPU_ResNet-18_Adam_lr0.001_ep5_pmFalse
✓ Test Accuracy: 87.15%, Training Time: 5151594.52ms, FLOPs: 5.16e+08

CPU_ResNet-50_SGD_lr0.001_ep3_pmFalse
✓ Test Accuracy: 76.81%, Training Time: 10360176.85ms, FLOPs: 1.17e+09

CPU_ResNet-50_SGD_lr0.001_ep5_pmFalse
✓ Test Accuracy: 79.23%, Training Time: 17266961.42ms, FLOPs: 1.17e+09

CPU_ResNet-50_Adam_lr0.001_ep3_pmFalse
✓ Test Accuracy: 81.40%, Training Time: 10534333.43ms, FLOPs: 1.17e+09

CPU_ResNet-50_Adam_lr0.001_ep5_pmFalse
✓ Test Accuracy: 83.67%, Training Time: 17557222.38ms, FLOPs: 1.17e+09

CPU_ResNet-18_SGD_lr0.001_ep3_pmTrue
✓ Test Accuracy: 86.89%, Trai