In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets,transforms
from torch.utils.data import DataLoader

# CIFAR-10 images shape (32, 32, 3)

class AlexNetCIFAR(nn.Module):
    def __init__(self,num_classes = 10):
        super().__init__()

        self.features = nn.Sequential(
            #1st Conv2d 96 filters
            nn.Conv2d(3, 96, kernel_size = 3, stride = 1, padding = 1), # shape (96, 32, 32)
            nn.BatchNorm2d(num_features=96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2), # shape (16, 16, 96)

            # 2nd Conv2d 256 filters
            nn.Conv2d(96, 256, kernel_size = 3, stride = 1,padding = 1), # shape (256, 16, 16)
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2), # shape (8, 8, 256)

            # 3rd Conv2d  384 filters
            nn.Conv2d(256, 384, kernel_size = 3, stride =1, padding = 1), # shape (256, 8, 8)
            nn.BatchNorm2d(num_features=384),
            nn.ReLU(),

            # 4th Conv2d 384 filters
            nn.Conv2d(384, 384, kernel_size = 3, stride = 1, padding = 1), # shape (384, 8,8)
            nn.BatchNorm2d(num_features=384),
            nn.ReLU(),

            # 5th Conv2d 256 filters
            nn.Conv2d(384, 256, kernel_size =3,stride = 1, padding = 1), # shape (256, 8,8)
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2), # shape (256,4,4)
            
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p = 0.5),
            nn.Linear(256 * 4 * 4, 4096),

            nn.ReLU(inplace = True),
            nn.Dropout(p = 0.5),
            nn.Linear(4096, 4096),

            nn.ReLU(inplace = True),
            nn.Linear(4096, num_classes)

        )

    def forward(self,x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [12]:
def train_CIFAR(architecture_class,optimizer_name = 'Adam'):
    # Data loading and transformation

    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=(0.4914, 0.4822, 0.4465),
            std=(0.2470, 0.2435, 0.2616)
        )
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=(0.4914, 0.4822, 0.4465),
            std=(0.2470, 0.2435, 0.2616)
        )
    ])

    train_dataset = datasets.CIFAR10(root="./data-CIFAR",download = True, train=True, transform=transform_train)
    test_dataset  = datasets.CIFAR10(root="./data-CIFAR",download = True, train=False, transform=transform_test)

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader  = DataLoader(test_dataset, batch_size=128, shuffle=False)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Optimizer Part

    model = architecture_class().to(device)
    criterion = nn.CrossEntropyLoss()
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    else:
        print("Unknown optimizer! Defaulting to Adam.")
        optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # ------- training loop --------

    epochs = 30

    for epoch in range(epochs):
        model.train()
        correct = 0
        total = 0
        running_loss = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = outputs.max(1)
            total += labels.size(0)
            correct += preds.eq(labels).sum().item()

        print(f"Epoch [{epoch+1}/{epochs}] "
            f"Loss: {running_loss/len(train_loader):.4f} "
            f"Train Acc: {100*correct/total:.2f}%")
    
    # Test Evaluation Part

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            total += labels.size(0)
            correct += preds.eq(labels).sum().item()

    print(f"Test Accuracy: {100*correct/total:.2f}%")



In [13]:
train_CIFAR(architecture_class=AlexNetCIFAR, optimizer_name='Adam')

Epoch [1/30] Loss: 1.8472 Train Acc: 30.90%
Epoch [2/30] Loss: 1.4171 Train Acc: 47.55%
Epoch [3/30] Loss: 1.1973 Train Acc: 56.92%
Epoch [4/30] Loss: 1.0525 Train Acc: 62.67%
Epoch [5/30] Loss: 0.9386 Train Acc: 66.98%
Epoch [6/30] Loss: 0.8611 Train Acc: 69.99%
Epoch [7/30] Loss: 0.7849 Train Acc: 72.76%
Epoch [8/30] Loss: 0.7264 Train Acc: 75.19%
Epoch [9/30] Loss: 0.6669 Train Acc: 77.33%
Epoch [10/30] Loss: 0.6212 Train Acc: 79.03%
Epoch [11/30] Loss: 0.5855 Train Acc: 80.08%
Epoch [12/30] Loss: 0.5466 Train Acc: 81.73%
Epoch [13/30] Loss: 0.5042 Train Acc: 83.10%
Epoch [14/30] Loss: 0.4747 Train Acc: 84.28%
Epoch [15/30] Loss: 0.4389 Train Acc: 85.54%
Epoch [16/30] Loss: 0.4180 Train Acc: 86.25%
Epoch [17/30] Loss: 0.3813 Train Acc: 87.20%
Epoch [18/30] Loss: 0.3605 Train Acc: 88.18%
Epoch [19/30] Loss: 0.3402 Train Acc: 88.70%
Epoch [20/30] Loss: 0.3194 Train Acc: 89.31%
Epoch [21/30] Loss: 0.3032 Train Acc: 89.88%
Epoch [22/30] Loss: 0.2807 Train Acc: 90.57%
Epoch [23/30] Loss: