In [1]:
#imports
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
torch.set_printoptions(edgeitems=2, linewidth=75)
torch.manual_seed(123)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


Using device: cuda


In [2]:
# dataloading

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
batch_size = 32

cifar10_train = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

cifar10_val = datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Model Definitions

class AlexNet(nn.Module):
  def __init__(self, n_classes, dropout_chance):
    super(AlexNet, self).__init__()

    self.conv_layers = nn.Sequential(
      # Layer 1
      nn.Conv2d (3, 32, kernel_size=3, stride=1, padding=0),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.Dropout(p=dropout_chance),
      nn.MaxPool2d(kernel_size=3, stride=2),
      # Layer 2
      nn.Conv2d (32, 64, kernel_size=5, stride=1, padding=2),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Dropout(p=dropout_chance),
      nn.MaxPool2d(kernel_size=3, stride=2),
      # Layer 3
      nn.Conv2d (64, 128, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.Dropout(p=dropout_chance),
      # Layer 4
      nn.Conv2d (128, 128, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.Dropout(p=dropout_chance),
      # Layer 5
      nn.Conv2d (128, 64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=3, stride=2),
      nn.Dropout(p=dropout_chance)
    )

    self.dense_layers = nn.Sequential(
      nn.Flatten(),
      # Layer 6
      nn.Linear(64*2*2, 128),
      nn.ReLU(),
      # Layer 7
      nn.Linear(128, 128),
      nn.ReLU(),
      # Layer 8
      nn.Linear(128, n_classes),
      nn.LogSoftmax(dim=1)
    )
   
  def forward(self, x):
    x = self.conv_layers(x)
    x = self.dense_layers(x)
    return x

class BasicBlock(nn.Module):
    expansion = 1  # Expansion factor to adjust the number of output channels if needed

    def __init__(self, in_channels, out_channels, dropout_chance, stride=1):
        super(BasicBlock, self).__init__()
        # First convolutional layer of the block
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)  # Batch normalization after the first convolution
        self.drop = nn.Dropout(p=dropout_chance) if dropout_chance > 0 else None
        
        # Second convolutional layer of the block
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)  # Batch normalization after the second convolution

        # Shortcut connection to match input and output dimensions if necessary
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels)
            )

    def forward(self, x):
        # Forward pass through the first convolution, batch norm, and ReLU activation
        out = F.relu(self.bn1(self.conv1(x)))
        # Forward pass through the second convolution and batch norm
        out = self.bn2(self.conv2(out))
        # Adding dropout
        out = self.drop(out) if self.drop != None else out
        # Adding the shortcut connection's output to the main path's output
        out += self.shortcut(x)
        # Final ReLU activation after adding the shortcut
        out = F.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, block, n_blocks, dropout_chance=0.5, n_classes=10):
        super(ResNet18, self).__init__()
        self.in_channels = 64  # Initial number of input channels

        # Initial convolutional layer before entering the residual blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)  # Batch normalization after the initial convolution
        
        # Creating layers of blocks with increasing channel sizes
        self.layer1 = self._make_layer(block, 64, n_blocks[0], dropout_chance=dropout_chance, stride=1)
        self.layer2 = self._make_layer(block, 128, n_blocks[1], dropout_chance=dropout_chance, stride=2)
        self.layer3 = self._make_layer(block, 256, n_blocks[2], dropout_chance=dropout_chance, stride=2)
        self.layer4 = self._make_layer(block, 512, n_blocks[3], dropout_chance=dropout_chance, stride=2)
        
        # Final fully connected layer for classification
        self.linear = nn.Linear(512 * block.expansion, n_classes)

    def _make_layer(self, block, out_channels, n_blocks, dropout_chance, stride):
        # Helper function to create a layer with specified blocks
        strides = [stride] + [1]*(n_blocks-1)  # First block could have a stride and the rest have stride of 1
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride=stride, dropout_chance=dropout_chance))
            self.in_channels = out_channels * block.expansion  # Update in_channels for the next block
        return nn.Sequential(*layers)

    def forward(self, x):
        # Forward pass through the initial convolution, batch norm, and ReLU activation
        out = F.relu(self.bn1(self.conv1(x)))
        # Forward pass through all the layers of blocks
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        # Global average pooling before the final layer
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)  # Flatten the output for the fully connected layer
        out = self.linear(out)  # Final classification layer
        return out


In [4]:
# Training/Validation Loops
def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, val_loader):
    # Lists for storing loss values and validation accuracy
    train_loss_list = []
    val_loss_list = []
    val_accuracy_list = []
    # Training and validation loop
    for epoch in range(n_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        model.train()  # Set the model to training mode
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_loss_list.append(running_loss / len(train_loader))

        # Validation loop
        running_loss = 0.0
        correct = 0
        total = 0
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():
            for data in val_loader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = model(images)
                loss = loss_fn(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_loss_list.append(running_loss / len(val_loader))
        val_accuracy = 100 * correct / total
        val_accuracy_list.append(val_accuracy)

        print(f'Epoch {epoch + 1}, Training loss: {train_loss_list[-1]}, Validation loss: {val_loss_list[-1]}, Validation Accuracy: {val_accuracy}%')
    return [train_loss_list, val_loss_list, val_accuracy_list]

In [5]:
# preprocessing
batch_size = 32
train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=batch_size, shuffle=False)

n_classes = 10
n_epochs = 20
dropout_chance = 0.3
learning_rate = 0.001

In [6]:
# Run AlexNet without dropout

Net_1 = AlexNet(n_classes=n_classes, dropout_chance=0).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(Net_1.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) 

print_model_parameters(Net_1)

train_loss_list_1, val_loss_list_1, val_accuracy_list_1 = training_loop(
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = Net_1,
    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader
    )

Total Parameters: 398410
Trainable Parameters: 398410
Epoch 1, Training loss: 1.4405927348426726, Validation loss: 1.2700357324779985, Validation Accuracy: 56.68%
Epoch 2, Training loss: 0.9966186016168795, Validation loss: 0.9329486111292062, Validation Accuracy: 67.37%
Epoch 3, Training loss: 0.8280734710028289, Validation loss: 0.8452811562976899, Validation Accuracy: 70.58%
Epoch 4, Training loss: 0.7305913601673648, Validation loss: 0.7427947720209249, Validation Accuracy: 74.48%
Epoch 5, Training loss: 0.6594734474759184, Validation loss: 0.8080914611824024, Validation Accuracy: 71.78%
Epoch 6, Training loss: 0.6051733979451221, Validation loss: 0.7226270979014449, Validation Accuracy: 75.38%
Epoch 7, Training loss: 0.5525909440207009, Validation loss: 0.6660986446534483, Validation Accuracy: 77.08%
Epoch 8, Training loss: 0.5128827611090507, Validation loss: 0.7121517256902049, Validation Accuracy: 75.44%
Epoch 9, Training loss: 0.47773542595992696, Validation loss: 0.6449162116

In [7]:
# Run AlexNet with dropout

Net_1_drop = AlexNet(n_classes=n_classes, dropout_chance=dropout_chance).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(Net_1_drop.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) 

print_model_parameters(Net_1_drop)

train_loss_list_1d, val_loss_list_1d, val_accuracy_list_1d = training_loop(
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = Net_1_drop,
    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader
    )

Total Parameters: 398410
Trainable Parameters: 398410
Epoch 1, Training loss: 1.702021241569397, Validation loss: 1.5225956180986886, Validation Accuracy: 50.99%
Epoch 2, Training loss: 1.2569625383756593, Validation loss: 1.3798216078608943, Validation Accuracy: 54.79%
Epoch 3, Training loss: 1.0989506840705872, Validation loss: 1.1618867826918824, Validation Accuracy: 61.92%
Epoch 4, Training loss: 0.9926018026572195, Validation loss: 1.0784214611251515, Validation Accuracy: 63.51%
Epoch 5, Training loss: 0.9093193631102012, Validation loss: 1.0324637023404764, Validation Accuracy: 66.15%
Epoch 6, Training loss: 0.8564154198172759, Validation loss: 0.9234217677634364, Validation Accuracy: 70.26%
Epoch 7, Training loss: 0.8077857723162866, Validation loss: 0.9574294067419375, Validation Accuracy: 68.58%
Epoch 8, Training loss: 0.7718592683855571, Validation loss: 0.8180684796727884, Validation Accuracy: 73.93%
Epoch 9, Training loss: 0.7456347072307528, Validation loss: 0.859709319596

In [8]:
# Run ResNet18 without dropout

Net_2 = ResNet18(BasicBlock, [2, 2, 2, 2], n_classes=10, dropout_chance=0.0).to(device)  # 4 blocks with 1 layer each

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(Net_2.parameters(), lr=learning_rate) 

print_model_parameters(Net_2)

train_loss_list_2, val_loss_list_2, val_accuracy_list_2 = training_loop(
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = Net_2,
    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader
    )

Total Parameters: 11173962
Trainable Parameters: 11173962
Epoch 1, Training loss: 1.693639721919235, Validation loss: 1.443128769009258, Validation Accuracy: 46.94%
Epoch 2, Training loss: 1.3197217740390215, Validation loss: 1.2526015875438532, Validation Accuracy: 54.47%
Epoch 3, Training loss: 1.1113442737020442, Validation loss: 1.0709847308957159, Validation Accuracy: 61.79%
Epoch 4, Training loss: 0.9548424317603377, Validation loss: 0.9620003717395064, Validation Accuracy: 65.49%
Epoch 5, Training loss: 0.8171594699109432, Validation loss: 0.9955667239218093, Validation Accuracy: 64.76%
Epoch 6, Training loss: 0.6996850139978065, Validation loss: 0.8980688998303094, Validation Accuracy: 68.87%
Epoch 7, Training loss: 0.5780116889389829, Validation loss: 0.8843061843047888, Validation Accuracy: 69.63%
Epoch 8, Training loss: 0.465583418711057, Validation loss: 0.8447716318951628, Validation Accuracy: 71.48%
Epoch 9, Training loss: 0.3549374931814269, Validation loss: 1.0103198728

In [34]:
# Run ResNet18 with dropout

Net_2_drop = ResNet18(BasicBlock, [2, 2, 2, 2], n_classes=10, dropout_chance=dropout_chance).to(device)  # 4 blocks with 1 layer each

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(Net_2_drop.parameters(), lr=learning_rate) 

print_model_parameters(Net_2_drop)

train_loss_list_2d, val_loss_list_2d, val_accuracy_list_2d = training_loop(
    n_epochs = n_epochs,
    optimizer = optimizer,
    model = Net_2_drop,
    loss_fn = loss_fn,
    train_loader = train_loader,
    val_loader = val_loader
    )

Total Parameters: 11173962
Trainable Parameters: 11173962
Epoch 1, Training loss: 1.8701069701274693, Validation loss: 2.395828752471997, Validation Accuracy: 20.94%
Epoch 2, Training loss: 1.568544747504529, Validation loss: 2.0324183089283707, Validation Accuracy: 33.06%
Epoch 3, Training loss: 1.407243894485808, Validation loss: 1.6495289581652266, Validation Accuracy: 42.59%
Epoch 4, Training loss: 1.2855474345591003, Validation loss: 1.61087693916723, Validation Accuracy: 45.47%
Epoch 5, Training loss: 1.1844227875720517, Validation loss: 1.525179234937357, Validation Accuracy: 49.33%
Epoch 6, Training loss: 1.0915645328532102, Validation loss: 1.302609054425273, Validation Accuracy: 55.68%
Epoch 7, Training loss: 1.020651616504081, Validation loss: 1.1464200408313983, Validation Accuracy: 60.77%
Epoch 8, Training loss: 0.9600746631050293, Validation loss: 1.0912083560666337, Validation Accuracy: 62.55%
Epoch 9, Training loss: 0.9093468193434326, Validation loss: 0.972449402839611