In [1]:
# Mohammadmilad Sayyad
# Problem 1.a

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms

import time
import numpy as np


In [2]:
# Select device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Hyperparameters (you can tune batch_size or lr if needed)
num_epochs = 300          # <<< For testing, set to 5 or 10 first
batch_size = 128
learning_rate = 0.01

# CIFAR-10 normalization values (standard)
mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)

# Data transforms: basic preprocessing (no heavy augmentation for baseline)
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


Using device: cuda


In [3]:
# Download CIFAR-10 (train and test)
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


Train samples: 50000
Test samples: 10000


In [4]:
class CNN_Baseline(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN_Baseline, self).__init__()
        # Convolutional part
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # After two 2x2 pools on 32x32:
        # 32x32 -> 16x16 -> 8x8
        # Channels: 64
        # Flatten size = 64 * 8 * 8 = 4096
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        # Input: (N, 3, 32, 32)
        x = self.pool(F.relu(self.conv1(x)))  # -> (N, 32, 16, 16)
        x = self.pool(F.relu(self.conv2(x)))  # -> (N, 64, 8, 8)
        x = x.view(x.size(0), -1)             # -> (N, 4096)
        x = F.relu(self.fc1(x))               # -> (N, 256)
        x = self.fc2(x)                       # -> (N, 10)
        return x

# Instantiate model
model = CNN_Baseline(num_classes=10).to(device)
print(model)


CNN_Baseline(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


In [5]:
def train_one_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    running_loss = 0.0

    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss


def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(dataloader.dataset)
    accuracy = 100.0 * correct / total
    return avg_loss, accuracy


In [8]:
# ===== Problem 1.a – Training (50 epochs) =====

# Utility function placed here so it's always defined
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# (Re)create the model so this run is fresh
model = CNN_Baseline(num_classes=10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

# How many epochs for this experiment
num_epochs = 50

# Model size (number of parameters)
num_params = count_parameters(model)
print(f"Number of trainable parameters (model size): {num_params}")

train_losses = []
test_losses = []
test_accuracies = []

start_time = time.time()
print("Starting training for Problem 1.a (Baseline CNN) with 50 epochs...")

for epoch in range(num_epochs):
    epoch_train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device)
    train_losses.append(epoch_train_loss)

    test_loss, test_acc = evaluate(model, test_loader, device)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {epoch_train_loss:.4f} | "
          f"Test Loss: {test_loss:.4f} | "
          f"Test Acc: {test_acc:.2f}%")

end_time = time.time()
training_time_seconds = end_time - start_time

print("\n=== Final Results for Problem 1.a (Baseline CNN, 50 epochs) ===")
print(f"Total training time: {training_time_seconds:.2f} seconds")
print(f"Final training loss (epoch {num_epochs}): {train_losses[-1]:.4f}")
print(f"Final test loss (epoch {num_epochs}): {test_losses[-1]:.4f}")
print(f"Final test accuracy (epoch {num_epochs}): {test_accuracies[-1]:.2f}%")
print(f"Model size (number of parameters): {num_params}")


Number of trainable parameters (model size): 1070794
Starting training for Problem 1.a (Baseline CNN) with 50 epochs...
Epoch [1/50] Train Loss: 1.5300 | Test Loss: 1.2430 | Test Acc: 55.36%
Epoch [2/50] Train Loss: 1.0903 | Test Loss: 0.9936 | Test Acc: 65.06%
Epoch [3/50] Train Loss: 0.8903 | Test Loss: 0.9422 | Test Acc: 66.90%
Epoch [4/50] Train Loss: 0.7553 | Test Loss: 0.8518 | Test Acc: 70.54%
Epoch [5/50] Train Loss: 0.6384 | Test Loss: 0.8936 | Test Acc: 69.95%
Epoch [6/50] Train Loss: 0.5145 | Test Loss: 0.8371 | Test Acc: 72.48%
Epoch [7/50] Train Loss: 0.4014 | Test Loss: 0.8509 | Test Acc: 72.96%
Epoch [8/50] Train Loss: 0.2994 | Test Loss: 0.9226 | Test Acc: 73.45%
Epoch [9/50] Train Loss: 0.2118 | Test Loss: 1.0373 | Test Acc: 72.29%
Epoch [10/50] Train Loss: 0.1379 | Test Loss: 1.1548 | Test Acc: 72.39%
Epoch [11/50] Train Loss: 0.0979 | Test Loss: 1.2979 | Test Acc: 72.18%
Epoch [12/50] Train Loss: 0.0673 | Test Loss: 1.4139 | Test Acc: 71.81%
Epoch [13/50] Train Loss: