In [3]:
# ============================================
# Problem 3(b) - Deeper Fully Connected NN
# 3 hidden layers: 512, 256, 128
# Train for 100 epochs and report final metrics
# ============================================

import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ------------------------
# 1. Hyperparameters
# ------------------------
input_dim   = 32 * 32 * 3   # 3072
hidden1     = 512           # same as part (a)
hidden2     = 256
hidden3     = 128
num_classes = 10
batch_size  = 128
num_epochs  = 100           # <--- changed to 100
learning_rate = 0.001

# ------------------------
# 2. CIFAR-10 Dataset & Loaders
# ------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2470, 0.2435, 0.2616))
])

train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform
)
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

print("Train samples:", len(train_dataset))
print("Test samples :", len(test_dataset))

# ------------------------
# 3. Deeper Fully Connected Network
# ------------------------
class FCNetThreeHidden(nn.Module):
    def __init__(self, input_dim, h1, h2, h3, num_classes):
        super(FCNetThreeHidden, self).__init__()
        self.fc1 = nn.Linear(input_dim, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, h3)
        self.fc4 = nn.Linear(h3, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)   # flatten: (B, 3072)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)             # logits
        return x

model_deep = FCNetThreeHidden(input_dim, hidden1, hidden2, hidden3, num_classes).to(device)
print(model_deep)

# ------------------------
# 4. Parameter count (for size comparison)
# ------------------------
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

num_params_deep = count_params(model_deep)
print(f"Number of trainable parameters (deep model, 3 hidden layers): {num_params_deep}")

# If you still have the baseline 1-hidden-layer model `model` from 3(a):
# num_params_base = count_params(model)
# print(f"Number of trainable parameters (baseline, 1 hidden layer): {num_params_base}")

# ------------------------
# 5. Loss, Optimizer, Evaluation Function
# ------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_deep.parameters(), lr=learning_rate)

def evaluate_accuracy(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    return 100.0 * correct / total

# ------------------------
# 6. Training Loop â€“ 100 epochs
# ------------------------
train_losses = []
test_accuracies = []

start_total_time = time.time()

for epoch in range(num_epochs):
    model_deep.train()
    start_epoch = time.time()

    running_loss = 0.0
    total_batches = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model_deep(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total_batches += 1

    avg_train_loss = running_loss / total_batches
    train_losses.append(avg_train_loss)

    # Evaluate accuracy on test set
    test_acc = evaluate_accuracy(model_deep, test_loader, device)
    test_accuracies.append(test_acc)

    epoch_time = time.time() - start_epoch

    # Print every 5 epochs + first + last
    if (epoch + 1) == 1 or (epoch + 1) % 5 == 0 or (epoch + 1) == num_epochs:
        print(f"Epoch [{epoch+1:03d}/{num_epochs}] | "
              f"Epoch Time: {epoch_time:.2f}s | "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Test Accuracy: {test_acc:.2f}%")

total_training_time = time.time() - start_total_time

# Final metrics after 100 epochs
final_train_loss = train_losses[-1]
final_test_acc = test_accuracies[-1]

print("\n===== Final Results after 100 epochs =====")
print(f"Total training time: {total_training_time:.2f} seconds")
print(f"Final training loss: {final_train_loss:.4f}")
print(f"Final test accuracy: {final_test_acc:.2f}%")
print("==========================================")


Using device: cuda
Train samples: 50000
Test samples : 10000
FCNetThreeHidden(
  (fc1): Linear(in_features=3072, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=10, bias=True)
  (relu): ReLU()
)
Number of trainable parameters (deep model, 3 hidden layers): 1738890
Epoch [001/100] | Epoch Time: 14.04s | Train Loss: 1.6579 | Test Accuracy: 46.62%
Epoch [005/100] | Epoch Time: 13.87s | Train Loss: 1.1475 | Test Accuracy: 53.53%
Epoch [010/100] | Epoch Time: 13.99s | Train Loss: 0.7932 | Test Accuracy: 53.23%
Epoch [015/100] | Epoch Time: 14.02s | Train Loss: 0.5293 | Test Accuracy: 53.92%
Epoch [020/100] | Epoch Time: 13.94s | Train Loss: 0.3762 | Test Accuracy: 53.45%
Epoch [025/100] | Epoch Time: 13.97s | Train Loss: 0.2871 | Test Accuracy: 53.05%
Epoch [030/100] | Epoch Time: 13.91s | Train Loss: 0.2259 | Test Accuracy: 53.28%
Epoch [035