In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder, CIFAR10
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# Hyperparameters (Global)
BATCH_SIZE = 64
EPOCHS = 5 # Keep low for demonstration, increase to 10-20 for final results
LEARNING_RATE = 0.001

Using device: mps


In [2]:
def get_dataloaders(dataset_name):
    # Transforms
    transform = transforms.Compose([
        transforms.Resize((64, 64)), # Standardize size
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    if dataset_name == 'CIFAR10':
        train_set = CIFAR10(root='./data', train=True, download=True, transform=transform)
        test_set = CIFAR10(root='./data', train=False, download=True, transform=transform)
        num_classes = 10
    
    elif dataset_name == 'CatsDogs':
        
        if not os.path.exists('./data/cats_dogs'):
            raise FileNotFoundError("Please download Cats vs Dogs dataset and place in ./data/cats_dogs")
            
        full_dataset = ImageFolder(root='./data/cats_dogs', transform=transform)
        # Split into train/test
        train_size = int(0.8 * len(full_dataset))
        test_size = len(full_dataset) - train_size
        train_set, test_set = random_split(full_dataset, [train_size, test_size])
        num_classes = 2

    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)
    
    return train_loader, test_loader, num_classes


In [3]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes, activation_type='relu', init_type='xavier'):
        super(CustomCNN, self).__init__()
        
        self.activation_type = activation_type
        self.init_type = init_type
        
        # Define Layers
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            self.get_activation(),
            nn.MaxPool2d(2, 2),
            
            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            self.get_activation(),
            nn.MaxPool2d(2, 2),
            
            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            self.get_activation(),
            nn.MaxPool2d(2, 2),
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(128 * 8 * 8, 512), # Assuming input 64x64 -> 8x8 spatial
            self.get_activation(),
            nn.Linear(512, num_classes)
        )
        
        # Apply Weight Initialization
        self.apply(self.initialize_weights)

    def get_activation(self):
        if self.activation_type == 'relu':
            return nn.ReLU()
        elif self.activation_type == 'tanh':
            return nn.Tanh()
        elif self.activation_type == 'leaky_relu':
            return nn.LeakyReLU(0.1)
        else:
            return nn.ReLU()

    def initialize_weights(self, m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            if self.init_type == 'xavier':
                nn.init.xavier_uniform_(m.weight)
            elif self.init_type == 'kaiming':
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            elif self.init_type == 'random':
                nn.init.normal_(m.weight, mean=0.0, std=0.02)
            
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [4]:
def train_model(model, train_loader, criterion, optimizer, epochs=5):
    model.train()
    loss_history = []
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        epoch_acc = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        loss_history.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%")
        
    return loss_history

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [5]:
def run_experiments(dataset_name):
    train_loader, test_loader, num_classes = get_dataloaders(dataset_name)
    
    # Configurations
    activations = ['relu', 'tanh', 'leaky_relu']
    initializations = ['xavier', 'kaiming', 'random']
    optimizers_list = ['sgd', 'adam', 'rmsprop']
    
    best_acc = 0
    best_model = None
    best_config = ""
    
    # Create directory for weights
    os.makedirs(f'weights/{dataset_name}', exist_ok=True)

    print(f"--- Starting Experiments for {dataset_name} ---")
    
    # NOTE: Running all 27 combinations takes a long time. 
    # For demonstration, we will loop through one list while keeping others constant
    # You should un-comment the nested loops for the full lab requirement.
    
    # Full loop structure:
    for act in activations:
        for init in initializations:
             for opt_name in optimizers_list:
                print(f"\nConfig: Act={act}, Init={init}, Optim={opt_name}")
                
                model = CustomCNN(num_classes=num_classes, activation_type=act, init_type=init).to(device)
                criterion = nn.CrossEntropyLoss()
                
                if opt_name == 'sgd':
                    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
                elif opt_name == 'adam':
                    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
                elif opt_name == 'rmsprop':
                    optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE)
                
                _ = train_model(model, train_loader, criterion, optimizer, epochs=3) # Low epochs for speed
                acc = evaluate_model(model, test_loader)
                
                if acc > best_acc:
                    best_acc = acc
                    best_model = model
                    best_config = f"{act}_{init}_{opt_name}"
                    torch.save(model.state_dict(), f'weights/{dataset_name}/best_model.pth')
    
    print(f"\nBest {dataset_name} Accuracy: {best_acc:.2f}% with Config: {best_config}")
    return best_model, num_classes, test_loader

# Run for CIFAR-10
best_cnn_cifar, num_classes_cifar, test_loader_cifar = run_experiments('CIFAR10')

# Run for Cats vs Dogs (Uncomment if data is present)
# best_cnn_cats, num_classes_cats, test_loader_cats = run_experiments('CatsDogs')

100%|████████████████████████████████████████| 170M/170M [00:21<00:00, 7.86MB/s]


--- Starting Experiments for CIFAR10 ---

Config: Act=relu, Init=xavier, Optim=sgd
Epoch [1/3], Loss: 1.5714, Acc: 43.88%
Epoch [2/3], Loss: 1.2451, Acc: 55.22%
Epoch [3/3], Loss: 1.1088, Acc: 60.49%
Test Accuracy: 62.53%

Config: Act=relu, Init=xavier, Optim=adam
Epoch [1/3], Loss: 1.5022, Acc: 48.61%
Epoch [2/3], Loss: 1.0452, Acc: 62.87%
Epoch [3/3], Loss: 0.8949, Acc: 68.45%
Test Accuracy: 68.88%

Config: Act=relu, Init=xavier, Optim=rmsprop
Epoch [1/3], Loss: 2.5676, Acc: 40.08%
Epoch [2/3], Loss: 1.1909, Acc: 57.18%
Epoch [3/3], Loss: 0.9994, Acc: 64.71%
Test Accuracy: 64.55%

Config: Act=relu, Init=kaiming, Optim=sgd
Epoch [1/3], Loss: 1.5995, Acc: 43.25%
Epoch [2/3], Loss: 1.2588, Acc: 54.63%
Epoch [3/3], Loss: 1.1401, Acc: 59.24%
Test Accuracy: 61.27%

Config: Act=relu, Init=kaiming, Optim=adam
Epoch [1/3], Loss: 1.4878, Acc: 49.21%
Epoch [2/3], Loss: 1.0321, Acc: 63.31%
Epoch [3/3], Loss: 0.8975, Acc: 68.40%
Test Accuracy: 69.99%

Config: Act=relu, Init=kaiming, Optim=rmsprop

In [6]:
from torchvision import models

def train_resnet(dataset_name, test_loader, num_classes):
    print(f"\n--- Fine-tuning ResNet-18 for {dataset_name} ---")
    
    # Load Pretrained Model
    resnet = models.resnet18(weights='IMAGENET1K_V1')
    
    # Freeze initial layers (optional, but good for small datasets)
    for param in resnet.parameters():
        param.requires_grad = False
        
    # Modify the final Fully Connected layer
    num_ftrs = resnet.fc.in_features
    resnet.fc = nn.Linear(num_ftrs, num_classes)
    
    resnet = resnet.to(device)
    
    # Only optimize the final layer
    optimizer = optim.Adam(resnet.fc.parameters(), lr=LEARNING_RATE)
    criterion = nn.CrossEntropyLoss()
    
    # Get dataloaders again
    train_loader, _, _ = get_dataloaders(dataset_name)
    
    # Train
    train_model(resnet, train_loader, criterion, optimizer, epochs=5)
    acc = evaluate_model(resnet, test_loader)
    
    torch.save(resnet.state_dict(), f'weights/{dataset_name}/resnet18_finetuned.pth')
    return acc

# Compare
resnet_acc_cifar = train_resnet('CIFAR10', test_loader_cifar, num_classes_cifar)
print(f"ResNet18 Accuracy: {resnet_acc_cifar:.2f}%")


--- Fine-tuning ResNet-18 for CIFAR10 ---
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/jahanavisingh/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████████████████████████████████| 44.7M/44.7M [00:09<00:00, 5.02MB/s]


Epoch [1/5], Loss: 1.2357, Acc: 58.10%
Epoch [2/5], Loss: 1.0738, Acc: 63.79%
Epoch [3/5], Loss: 1.0525, Acc: 64.47%
Epoch [4/5], Loss: 1.0417, Acc: 64.35%
Epoch [5/5], Loss: 1.0364, Acc: 64.89%
Test Accuracy: 64.75%
ResNet18 Accuracy: 64.75%
