## Import necessary packages

In [None]:
import random

import numpy as np

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

## Prepare dataset

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 1
batch_size = 64
learning_rate = 0.001

# Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


# CNN model definition

In [None]:
class CNN(nn.Module):
    def __init__(self, drop_out=0., init_method='default'):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 10)

        if drop_out > 0:
            self.dropout = nn.Dropout(drop_out)
        else:
            self.dropout = None

        if init_method == 'default':
            nn.init.normal_(self.conv1.weight)
            nn.init.normal_(self.conv2.weight)
            nn.init.normal_(self.fc1.weight)
            nn.init.normal_(self.fc2.weight)
        elif init_method == 'xavier':
            nn.init.xavier_normal_(self.conv1.weight)
            nn.init.xavier_normal_(self.conv2.weight)
            nn.init.xavier_normal_(self.fc1.weight)
            nn.init.xavier_normal_(self.fc2.weight)
        elif init_method == 'kaiming':
            nn.init.kaiming_normal_(self.conv1.weight)
            nn.init.kaiming_normal_(self.conv2.weight)
            nn.init.kaiming_normal_(self.fc1.weight)
            nn.init.kaiming_normal_(self.fc2.weight)
        else:
            ValueError(f"Initialization method `{init_method}` is not supported.")

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(-1, 64 * 8 * 8)
        x = self.fc1(x)
        x = self.relu(x)

        if self.dropout:
            x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)
        return x


# Train & Evaluate function definitions

In [None]:
def train(
    model,
    train_loader,
    num_epochs : int,
    learning_rate : float,
    weight_decay : float,
    scheduler_type : str = 'step',
):

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    if scheduler_type == 'step':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    elif scheduler_type == 'exponential':
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    elif scheduler_type == 'cyclic':
        scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-4, max_lr=learning_rate)
    else:
        ValueError(f"Scheduler `{scheduler_type}` is not supported")
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))
        scheduler.setp()

def evaluate(
    model,
    test_loader
):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy of the model on the test images: {accuracy}%')
    
    return accuracy

## Experiment 1: Regularization Techniques

### L2 regularization

In [None]:
weight_decays = np.linspace(0, 1, 20)
accuracy_list = []

for wd in weight_decays:
    print("*" * 10, f"L2 regularization: {wd}", "*" * 10)
    model = CNN(drop_out=0).to(device)
    train(model, train_loader, num_epochs, learning_rate, wd)
    accuracy = evaluate(model, test_loader)
    accuracy_list.append(accuracy)

### Drop-out regularization

In [None]:
weight_decay = 1e-4
drop_out_rates = np.linspace(0, 0.8, 20)
accuracy_list = []

for d in drop_out_rates:
    print("*" * 10, f"Drop-out rate: {d}", "*" * 10)
    model = CNN(drop_out=d).to(device)
    train(model, train_loader, num_epochs, learning_rate, weight_decay)
    accuracy = evaluate(model, test_loader)
    accuracy_list.append(accuracy)

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(weight_decays, accuracy_list)
fig.show()

## Experiment 2: Initialization Techniques

In [None]:
drop_out_rate = 0.5
weight_decay = 1e-4

### Default Initialization

In [None]:
model = CNN(drop_out=drop_out_rate, init_method='default').to(device)
train(model, train_loader, num_epochs, learning_rate, weight_decay)
accuracy = evaluate(model, test_loader)

### Xavier Initialization

In [None]:
model = CNN(drop_out=drop_out_rate, init_method='xavier').to(device)
train(model, train_loader, num_epochs, learning_rate, weight_decay)
accuracy = evaluate(model, test_loader)

### Kaiming Initialization

In [None]:
model = CNN(drop_out=drop_out_rate, init_method='kaiming').to(device)
train(model, train_loader, num_epochs, learning_rate, weight_decay)
accuracy = evaluate(model, test_loader)

## Experiment 3: Learning Rate Scheduling Techniques

### Step decay

In [None]:
model = CNN(drop_out=drop_out_rate).to(device)
train(model, train_loader, num_epochs, learning_rate, weight_decay, scheduler='Adam')
accuracy = evaluate(model, test_loader)

### Exponential decay

In [None]:
model = CNN(drop_out=drop_out_rate).to(device)
train(model, train_loader, num_epochs, learning_rate, weight_decay, scheduler='Adam')
accuracy = evaluate(model, test_loader)

### Cyclic learning rates

Convergence Speed