In [1]:
# Mohammadmilad Sayyad
# Problem 1.b

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import time


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

batch_size = 128
learning_rate = 0.01
num_epochs = 100   # <<< PROBLEM 1.b requirement

mean = (0.4914, 0.4822, 0.4465)
std  = (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


Using device: cuda


In [3]:
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=2)


100%|██████████| 170M/170M [00:14<00:00, 11.9MB/s]


In [4]:
class CNN_Deeper(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN_Deeper, self).__init__()

        # Block 1: 3 → 32
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)

        # Block 2: 32 → 64
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        # Block 3: 64 → 128  (NEW LAYER)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        # After 3 poolings: 32→16→8→4
        # Feature size = 128 * 4 * 4 = 2048
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # -> (32, 16, 16)
        x = self.pool(F.relu(self.conv2(x)))   # -> (64, 8,  8)
        x = self.pool(F.relu(self.conv3(x)))   # -> (128,4,  4)

        x = x.view(x.size(0), -1)              # Flatten → 2048
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN_Deeper().to(device)
print(model)


CNN_Deeper(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


In [5]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

num_params = count_parameters(model)
print("Model size (trainable parameters):", num_params)


Model size (trainable parameters): 620362


In [6]:
def train_one_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    running_loss = 0.0

    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    return running_loss / len(dataloader.dataset)


def evaluate(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return running_loss / len(dataloader.dataset), 100 * correct / total


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

train_losses = []
test_losses = []
test_accuracies = []

start_time = time.time()
print("Training CNN_Deeper (Problem 1.b) for 100 epochs...")

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device)
    test_loss, test_acc = evaluate(model, test_loader, device)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f} | "
          f"Test Loss: {test_loss:.4f} | "
          f"Test Acc: {test_acc:.2f}%")

end_time = time.time()
training_time = end_time - start_time

print("\n=== Final Results for Problem 1.b (Deeper CNN, 100 epochs) ===")
print(f"Training Time: {training_time:.2f} seconds")
print(f"Final Training Loss: {train_losses[-1]:.4f}")
print(f"Final Test Loss: {test_losses[-1]:.4f}")
print(f"Final Test Accuracy: {test_accuracies[-1]:.2f}%")
print(f"Total Parameters: {num_params}")


Training CNN_Deeper (Problem 1.b) for 100 epochs...
Epoch [1/100] Train Loss: 1.7106 | Test Loss: 1.2963 | Test Acc: 53.59%
Epoch [2/100] Train Loss: 1.1841 | Test Loss: 1.0549 | Test Acc: 62.79%
Epoch [3/100] Train Loss: 0.9531 | Test Loss: 0.9571 | Test Acc: 65.28%
Epoch [4/100] Train Loss: 0.7966 | Test Loss: 0.8150 | Test Acc: 72.26%
Epoch [5/100] Train Loss: 0.6881 | Test Loss: 0.7928 | Test Acc: 73.06%
Epoch [6/100] Train Loss: 0.5889 | Test Loss: 0.7334 | Test Acc: 74.21%
Epoch [7/100] Train Loss: 0.4986 | Test Loss: 0.7466 | Test Acc: 75.08%
Epoch [8/100] Train Loss: 0.4090 | Test Loss: 0.8115 | Test Acc: 74.80%
Epoch [9/100] Train Loss: 0.3418 | Test Loss: 0.7876 | Test Acc: 75.74%
Epoch [10/100] Train Loss: 0.2686 | Test Loss: 0.8651 | Test Acc: 75.58%
Epoch [11/100] Train Loss: 0.2039 | Test Loss: 0.9233 | Test Acc: 75.53%
Epoch [12/100] Train Loss: 0.1629 | Test Loss: 0.9304 | Test Acc: 75.98%
Epoch [13/100] Train Loss: 0.1293 | Test Loss: 1.0733 | Test Acc: 75.42%
Epoch [1