In [5]:
# Mohammadmilad Sayyad
# Problem 2.a


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

import time



In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Hyperparameters
batch_size   = 128
learning_rate = 0.01
num_epochs    = 50    # <<< 50 epochs instead of 300

# CIFAR-10 normalization
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


Using device: cuda


In [7]:
train_dataset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=2)

print("Train samples:", len(train_dataset))
print("Test samples:", len(test_dataset))


Train samples: 50000
Test samples: 10000


In [8]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels,
                               kernel_size=3, stride=stride, padding=1, bias=False)
        self.conv2 = nn.Conv2d(out_channels, out_channels,
                               kernel_size=3, stride=1, padding=1, bias=False)

        # Shortcut for match dim / stride
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Conv2d(in_channels, out_channels,
                                      kernel_size=1, stride=stride, bias=False)
        else:
            self.shortcut = None

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.conv2(out)

        identity = x if self.shortcut is None else self.shortcut(x)
        out = F.relu(out + identity)
        return out


class ResNet10(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet10, self).__init__()

        self.in_channels = 16

        # First conv (no pooling, CIFAR style)
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)

        # 10 blocks total: 3 + 3 + 4
        self.layer1 = self._make_layer(16, num_blocks=3, stride=1)  # 32x32
        self.layer2 = self._make_layer(32, num_blocks=3, stride=2)  # 16x16
        self.layer3 = self._make_layer(64, num_blocks=4, stride=2)  # 8x8

        # Global average pooling + FC
        self.fc = nn.Linear(64, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(ResidualBlock(self.in_channels, out_channels, stride=s))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)

        # Global average pool to 1x1
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = out.view(out.size(0), -1)  # (N, 64)
        out = self.fc(out)
        return out


# Instantiate model
model = ResNet10(num_classes=10).to(device)
print(model)


ResNet10(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (layer2): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias

In [9]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

num_params = count_parameters(model)
print("ResNet-10 trainable parameters:", num_params)


ResNet-10 trainable parameters: 344634


In [10]:
def train_one_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    running_loss = 0.0

    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss


def evaluate(model, dataloader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(dataloader.dataset)
    accuracy = 100.0 * correct / total
    return avg_loss, accuracy


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

train_losses = []
test_losses = []
test_accuracies = []

start_time = time.time()
print("Training ResNet-10 (Problem 2.a) for 50 epochs...")

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device)
    test_loss, test_acc = evaluate(model, test_loader, device)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f} | "
          f"Test Loss: {test_loss:.4f} | "
          f"Test Acc: {test_acc:.2f}%")

end_time = time.time()
training_time = end_time - start_time

print("\n=== Final Results for Problem 2.a (ResNet-10, 50 epochs) ===")
print(f"Training time: {training_time:.2f} seconds")
print(f"Final training loss (epoch {num_epochs}): {train_losses[-1]:.4f}")
print(f"Final test loss   (epoch {num_epochs}): {test_losses[-1]:.4f}")
print(f"Final test accuracy (epoch {num_epochs}): {test_accuracies[-1]:.2f}%")
print(f"Model size (parameters): {num_params}")


Training ResNet-10 (Problem 2.a) for 50 epochs...
Epoch [1/50] Train Loss: 2.0672 | Test Loss: 1.8689 | Test Acc: 31.51%
Epoch [2/50] Train Loss: 1.7330 | Test Loss: 1.6404 | Test Acc: 38.41%
Epoch [3/50] Train Loss: 1.5500 | Test Loss: 1.4710 | Test Acc: 45.93%
Epoch [4/50] Train Loss: 1.4009 | Test Loss: 1.2757 | Test Acc: 53.06%
Epoch [5/50] Train Loss: 1.2742 | Test Loss: 1.3032 | Test Acc: 54.02%
Epoch [6/50] Train Loss: 1.1432 | Test Loss: 1.1180 | Test Acc: 59.88%
Epoch [7/50] Train Loss: 1.0489 | Test Loss: 1.1027 | Test Acc: 60.71%
Epoch [8/50] Train Loss: 0.9485 | Test Loss: 0.9205 | Test Acc: 67.67%
Epoch [9/50] Train Loss: 0.8674 | Test Loss: 0.8783 | Test Acc: 69.76%
Epoch [10/50] Train Loss: 0.8014 | Test Loss: 0.8204 | Test Acc: 71.76%
Epoch [11/50] Train Loss: 0.7394 | Test Loss: 0.7589 | Test Acc: 73.70%
Epoch [12/50] Train Loss: 0.6672 | Test Loss: 0.7893 | Test Acc: 74.03%
Epoch [13/50] Train Loss: 0.6307 | Test Loss: 0.6848 | Test Acc: 76.73%
Epoch [14/50] Train Los