In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import time

# --- Configuration ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

batch_size = 128
num_classes = 10
num_epochs_scratch = 15
learning_rate_scratch = 0.01

# --- 1. Define the AlexNet Architecture (For Task 1) ---

class AlexNetOriginal(nn.Module):
    """
    AlexNet architecture using the original filter sizes and strides,
    requiring 227x227 input images. Flattened size is 256 * 6 * 6 = 9216.
    """
    def __init__(self, num_classes=10):
        super(AlexNetOriginal, self).__init__()

        # --- Feature Extractor ---
        self.features = nn.Sequential(
            # Layer 1: Conv2d, ReLU, MaxPool2d
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # Layer 2: Conv2d, ReLU, MaxPool2d
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # Layer 3: Conv2d, ReLU
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            # Layer 4: Conv2d, ReLU
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            # Layer 5: Conv2d, ReLU, MaxPool2d
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        FC_INPUT_SIZE = 256 * 6 * 6  # 9216

        # --- Classifier ---
        self.classifier = nn.Sequential(
            # Layer 6: Linear , ReLU, Dropout
            nn.Linear(in_features=FC_INPUT_SIZE, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            # Layer 7: Linear , ReLU, Dropout
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            # Output Layer : Linear (to num_classes)
            nn.Linear(in_features=4096, out_features=num_classes)
        )

    def forward(self, x):
        # Pass input through feature extractor
        x = self.features(x)
        # Flatten the output for the classifier
        x = x.view(x.size(0), -1)
        # Pass flattened features through classifier
        final = self.classifier(x)
        # Return the final output
        return final

# --- 2. Data Preparation (CIFAR-10) ---

transform = transforms.Compose([
    transforms.Resize((227, 227)), # Resize for AlexNet
    transforms.ToTensor(),
    # Standard CIFAR-10 normalization
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
])

# Load and prepare data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                         shuffle=False, num_workers=2)

# --- General Training and Evaluation Functions ---

def train_model(model, criterion, optimizer, train_loader, epochs, model_name="Model"):
    """Generic training loop."""
    print(f"\n--- Starting Training for {model_name} ({epochs} epochs) ---")
    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if i % 100 == 99:
                print(f'[{model_name} - Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.3f}')
                running_loss = 0.0
    end_time = time.time()
    print(f'Training Complete for {model_name}. Time taken: {end_time - start_time:.2f}s')

def evaluate_model(model, test_loader, model_name="Model"):
    """Generic evaluation function."""
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'\nAccuracy of {model_name} on the test images: {accuracy:.2f} %')
    return accuracy

# ----------------------------------------------------------------------
#                         Task 1: Train from Scratch
# ----------------------------------------------------------------------

print("=========================================================")
print("TASK 1: Training AlexNetOriginal from Scratch on CIFAR-10")
print("=========================================================")

model_scratch = AlexNetOriginal(num_classes=num_classes).to(device)
criterion_scratch = nn.CrossEntropyLoss()
optimizer_scratch = optim.SGD(model_scratch.parameters(),
                              lr=learning_rate_scratch, momentum=0.9, weight_decay=0.0005)

# Train the model from scratch
train_model(model_scratch, criterion_scratch, optimizer_scratch, trainloader, num_epochs_scratch, "AlexNet Scratch")
accuracy_scratch = evaluate_model(model_scratch, testloader, "AlexNet Scratch")

Using device: cuda
TASK 1: Training AlexNetOriginal from Scratch on CIFAR-10

--- Starting Training for AlexNet Scratch (15 epochs) ---
[AlexNet Scratch - Epoch 1, Batch 100] Loss: 2.297
[AlexNet Scratch - Epoch 1, Batch 200] Loss: 2.078
[AlexNet Scratch - Epoch 1, Batch 300] Loss: 1.846
[AlexNet Scratch - Epoch 2, Batch 100] Loss: 1.502
[AlexNet Scratch - Epoch 2, Batch 200] Loss: 1.425
[AlexNet Scratch - Epoch 2, Batch 300] Loss: 1.378
[AlexNet Scratch - Epoch 3, Batch 100] Loss: 1.167
[AlexNet Scratch - Epoch 3, Batch 200] Loss: 1.129
[AlexNet Scratch - Epoch 3, Batch 300] Loss: 1.017
[AlexNet Scratch - Epoch 4, Batch 100] Loss: 0.911
[AlexNet Scratch - Epoch 4, Batch 200] Loss: 0.870
[AlexNet Scratch - Epoch 4, Batch 300] Loss: 0.831
[AlexNet Scratch - Epoch 5, Batch 100] Loss: 0.729
[AlexNet Scratch - Epoch 5, Batch 200] Loss: 0.705
[AlexNet Scratch - Epoch 5, Batch 300] Loss: 0.692
[AlexNet Scratch - Epoch 6, Batch 100] Loss: 0.597
[AlexNet Scratch - Epoch 6, Batch 200] Loss: 0.5

Below code can be used to get the total number of learnable parameters in your CNN model


In [9]:
# --- Count total number of parameters ---
total_params = sum(p.numel() for p in model_scratch.parameters())
trainable_params = sum(p.numel() for p in model_scratch.parameters() if p.requires_grad)

print(f"Total parameters in AlexNet: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")


Total parameters in AlexNet: 58,322,314
Trainable parameters: 58,322,314
