## Creating Classification Models for Cifar10 Dataset

## Importing the Modules

In [2]:
import torch
import torch.nn as nn
import torch.optim as opt
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import CIFAR10

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
from timeit  import default_timer as timer
from tqdm.auto import tqdm
from os import cpu_count

In [8]:
!pip install -q torchmetrics
!pip install -U -q mlxtend
!pip install -q torchinfo

In [16]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix
from torchinfo import summary

In [7]:
from collections.abc import Callable # For Type Hinting

## Setting the Device Agnostic Code

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


## Setting the Basic Hyper Parameters

In [None]:
VALID_SIZE_PROP = 0.2

BATCH_SIZE = 32

## Downloading the Dataset (no Augmentation)



In [None]:
train_dataset = CIFAR10(root="/content/NoAug", download=True, train=True, transform=transforms.ToTensor())
test_dataset = CIFAR10(root="/content/NoAug", download=True, train=False, transform=transforms.ToTensor())
classes_names = train_dataset.classes

print(len(train_dataset), len(test_dataset))
print(classes_names)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/NoAug/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/NoAug/cifar-10-python.tar.gz to /content/NoAug
Files already downloaded and verified
50000 10000
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


## Splitting the Dataset into Training and Validation Sets (no Augmentation)

In [None]:
train_size = int((1 - VALID_SIZE_PROP) * len(train_dataset))
valid_size = int(VALID_SIZE_PROP * len(train_dataset))

train_ds, valid_ds = random_split(train_dataset, [train_size, valid_size])

print(len(train_ds), len(valid_ds))

40000 10000


## Creating the Data Loaders (no Augmentation)


In [None]:
train_dl = DataLoader(dataset=train_ds, 
                      batch_size=BATCH_SIZE,
                      shuffle=True,
                      num_workers=cpu_count(),
                      pin_memory=True)

valid_dl = DataLoader(dataset=valid_ds, 
                      batch_size=BATCH_SIZE,
                      num_workers=cpu_count(),
                      pin_memory=True)

## Creating the Custom Transformations

In [None]:
augmented_train_transforms = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=32),
    transforms.ToTensor()
])

augmented_test_transforms = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

## Loading the Dataset (with Augentation)

In [None]:
train_dataset_aug = CIFAR10(root="/content/Aug", download=True, train=True, transform=augmented_train_transforms)
test_dataset_aug = CIFAR10(root="/content/Aug", download=True, train=False, transform=augmented_test_transforms)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/Aug/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/Aug/cifar-10-python.tar.gz to /content/Aug
Files already downloaded and verified


## Splitting into Training and Validation Sets (with Augmentation)

In [None]:
train_ds_aug, valid_ds_aug = random_split(train_dataset_aug, [train_size, valid_size])

print(len(train_ds_aug), len(valid_ds_aug))

40000 10000


## Creating the DataLoaders (with Augmentation)

In [None]:
train_dl_aug = DataLoader(dataset=train_ds_aug,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=cpu_count(),
                          pin_memory=True)
valid_dl_aug = DataLoader(dataset=valid_ds_aug,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=cpu_count(),
                          pin_memory=True)

## Creating the Training Loop

In [20]:
def training_step(model: torch.nn.Module,
                  train_dl: torch.utils.data.DataLoader,
                  loss_fn: torch.nn.Module,
                  eval_metric: Callable[[torch.Tensor, torch.Tensor]],
                  optim: torch.optim.Optimizer,
                  n_batch_prints: int=None):
    
    batch_size = next(iter(train_dl))[0].shape[0]
    model_device = next(model.parameters()).device
    train_loss, train_eval = 0, 0
    dummy = 0

    model.train()
    for batch_num, (x_train, y_train) in enumerate(train_dl, start=1):
        x_train, y_train = x_train.to(model_device), y_train.to(model_device)

        model_logits = model(x_train)

        loss = loss_fn(model_logits, y_train)
        train_loss += loss.item()
        train_eval += eval_metric(model_logits, y_train)

        optim.zero_grad()
        loss.backward()
        optim.step()

        if n_batch_prints and (batch_num % (len(train_dl) // n_batch_prints) == 0):
            dummy += 1
            print(f"\t{dummy}) Looked at {batch_num*batch_size}/{len(train_dl)*batch_size} training samples...")

    train_loss /= len(train_dl)
    train_eval /= len(train_dl)

    if dummy != 0:
        print("-" * 107)

    return train_loss, train_eval

## Creating the Evaluating Loop

In [21]:
def validation_step(model: torch.nn.Module,
                    valid_dl: torch.utils.data.DataLoader,
                    loss_fn: torch.nn.Module,
                    eval_metric: Callable[[torch.Tensor, torch.Tensor]],
                    n_batch_prints: int=None):
    
    batch_size = len(next(iter(valid_dl))[0])
    model_device = next(model.parameters()).device
    valid_loss, valid_eval = 0, 0
    dummy = 0

    model.eval()
    with torch.inference_mode():
        for batch_num, (x_valid, y_valid) in enumerate(valid_dl, start=1):
            x_valid, y_valid = x_valid.to(model_device), y_valid.to(model_device)

            model_logits = model(x_valid)

            valid_loss += loss_fn(model_logits, y_valid).item()
            valid_eval += eval_metric(model_logits, y_valid)

            if n_batch_prints and (batch_num % (len(valid_dl) // n_batch_prints) == 0):
                dummy += 1
                print(f"\t{dummy}) Looked at {batch_num*batch_size}/{len(valid_dl)*batch_size} validation samples...")

        valid_loss /= len(valid_dl)
        valid_eval /= len(valid_dl)

        if dummy != 0:
            print("-" * 107)

        return valid_loss, valid_eval

## Creating the Training and Evaluating Function

In [22]:
def fit(model: torch.nn.Module,
        epochs: int,
        train_dl: torch.utils.data.DataLoader,
        valid_dl: torch.utils.data.DataLoader,
        loss_fn: torch.nn.Module,
        eval_metric: Callable[[torch.Tensor, torch.Tensor]],
        optim: torch.optim.Optimizer,
        n_epoch_per_print: int=1,
        n_train_batch_prints: int=None,
        n_valid_batch_prints: int=None):
    
    start_time = timer()
    train_losses, train_evals = [], []
    valid_losses, valid_evals = [], []

    print("Starting Process...")
    
    for epoch in tqdm(range(1, epochs + 1)):
        train_loss, train_eval = training_step(model, train_dl, loss_fn, eval_metric, optim, n_train_batch_prints)
        valid_loss, valid_eval = validation_step(model, valid_dl, loss_fn, eval_metric, n_valid_batch_prints)

        if (n_epoch_per_print > 0) and (epoch % n_epoch_per_print == 0):
            print(
                f"-> Epoch: {epoch} | "
                f"Train Loss: {train_loss:.4f} | "
                f"Train Accuracy: {train_eval:.2f}% | "
                f"Test Loss: {valid_loss:.4f} | "
                f"Test Evaluation (%): {valid_eval:.2f}%")
            print("-" * 107)
        
        train_losses.append(train_loss)
        train_evals.append(train_eval)
        valid_losses.append(valid_loss)
        valid_evals.append(valid_eval)

    print("Process Completed Successfully...")

    return {"model_train_loss": train_losses,
        "model_train_eval": train_evals,
        "model_valid_loss": valid_losses,
        "model_valid_eval": valid_evals,
        "model_name": model.__class__.__name__,
        "model_loss_fn": loss_fn.__class__.__name__,
        "model_evaluating_m": eval_metric.__name__,
        "model_optimizer": optim.__class__.__name__,
        "model_device": next(model.parameters()).device.type,
        "model_epochs": epochs,
        "model_time": timer() - start_time}

## Setting Evaluating Metric and Loss Function

In [18]:
def accuracy_fn(model_logits, labels):
    preds = torch.softmax(model_logits, dim=1).argmax(dim=1)

    return (torch.sum(preds == labels).item() / len(labels)) * 100

In [19]:
loss_fn = nn.CrossEntropyLoss()

## Models

### Model 0

#### Hyperparameters

In [None]:
LIN_HIDDEN_1_SIZE_0 = 128
LIN_HIDDEN_2_SIZE_0 = 256
LIN_HIDDEN_3_SIZE_0 = 128

LR_0 = 1e-3

#### Creating the Model

In [None]:
class CIFAR10_ModelV0(nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2, hidden_size_3, output_size):
        super().__init__()

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_size, out_features=hidden_size_1),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size_1, out_features=hidden_size_2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size_2, out_features=hidden_size_3),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size_3, out_features=output_size),
        )

    def forward(self, x: torch.Tensor):
        return self.classifier(x)

#### Initialize the Model

In [None]:
modelv0 = CIFAR10_ModelV0(32*32*3, LIN_HIDDEN_1_SIZE_0, LIN_HIDDEN_2_SIZE_0, LIN_HIDDEN_3_SIZE_0, len(classes_names)).to(device)

#### Setting Optimizer

In [None]:
optim_0 = opt.Adam(params=modelv0.parameters(), lr=LR_0)

#### Training and Evaluating the Model

In [None]:
res_0 = fit(modelv0, 10, train_dl, valid_dl, loss_fn, accuracy_fn, optim_0, 1, 2)

Starting Process...


  0%|          | 0/10 [00:00<?, ?it/s]

	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 1 | Train Loss: 1.8902 | Train Accuracy: 30.59% | Test Loss: 1.7611 | Test Evaluation (%): 36.11%
-----------------------------------------------------------------------------------------------------------
	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 2 | Train Loss: 1.7223 | Train Accuracy: 37.58% | Test Loss: 1.7010 | Test Evaluation (%): 38.31%
-----------------------------------------------------------------------------------------------------------
	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 3 | Train Loss: 1.6417 | Train Ac

#### Printing Model's Results

In [None]:
print(res_0)

{'model_train_loss': [1.8902263013839722, 1.722305962085724, 1.641725015926361, 1.5822068423748017, 1.5425180557727813, 1.507045959854126, 1.4745552579402923, 1.4509341909885407, 1.4249943515300751, 1.4028412193775177], 'model_train_eval': [30.595, 37.575, 40.9125, 42.9925, 44.3925, 45.755, 46.7375, 47.8025, 48.4775, 49.345], 'model_valid_loss': [tensor(1.7611, device='cuda:0'), tensor(1.7010, device='cuda:0'), tensor(1.6124, device='cuda:0'), tensor(1.5906, device='cuda:0'), tensor(1.5548, device='cuda:0'), tensor(1.5591, device='cuda:0'), tensor(1.5306, device='cuda:0'), tensor(1.5267, device='cuda:0'), tensor(1.4992, device='cuda:0'), tensor(1.5081, device='cuda:0')], 'model_valid_eval': [36.11222044728434, 38.30870607028754, 41.853035143769965, 43.0111821086262, 44.329073482428115, 43.69009584664537, 44.858226837060705, 45.89656549520767, 46.974840255591054, 46.295926517571885], 'model_name': 'CIFAR10_ModelV0', 'model_loss_fn': 'CrossEntropyLoss', 'model_evaluating_m': 'accuracy_fn

### Model 1

#### Hyperparameters

In [None]:
CONV_HIDDEN_1_SIZE_1 = 64
CONV_HIDDEN_2_SIZE_1 = 128

LIN_HIDDEN_1_SIZE_1 = 64
LIN_HIDDEN_2_SIZE_1 = 128

LR_1 = 1e-3

#### Creating the Model


In [None]:
class CIFAR10_ModelV1(nn.Module):
    def __init__(self,
                 input_size,
                 conv_hidden_1_size, conv_hidden_2_size,
                 lin_hidden_1_size, lin_hidden_2_size,
                 output_size):

        super().__init__()

        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_size,
                      out_channels=conv_hidden_1_size,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_1_size,
                      out_channels=conv_hidden_1_size,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # Output: (`batch_size`, `conv_hidden_1_size`, 15, 15)

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=conv_hidden_1_size,
                      out_channels=conv_hidden_2_size,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_2_size,
                      out_channels=conv_hidden_2_size,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # Output: (`batch_size`, `conv_hidden_2_size`, 5, 5)

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=conv_hidden_2_size*5*5, out_features=lin_hidden_1_size),
            nn.ReLU(),
            nn.Linear(in_features=lin_hidden_1_size, out_features=lin_hidden_2_size),
            nn.ReLU(),
            nn.Linear(in_features=lin_hidden_2_size, out_features=output_size)
        ) # Output: (`batch_size`, 10)

    def forward(self, x):
        return self.classifier(self.conv_block_2(self.conv_block_1(x)))

#### Initializing the Model

In [None]:
modelv1 = CIFAR10_ModelV1(3,
                          CONV_HIDDEN_1_SIZE_1, CONV_HIDDEN_2_SIZE_1,
                          LIN_HIDDEN_1_SIZE_1, LIN_HIDDEN_2_SIZE_1,
                          10).to(device)

#### Setting Optimizer

In [None]:
optim_1 = opt.Adam(modelv0.parameters(), lr=LR_1)

#### Training the Model

In [None]:
res_1 = fit(modelv1, 10, train_dl, valid_dl, loss_fn, accuracy_fn, optim_1, 1, 2)

Starting Process...


  0%|          | 0/10 [00:00<?, ?it/s]

	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 1 | Train Loss: 0.9263 | Train Accuracy: 67.38% | Test Loss: 0.9335 | Test Evaluation (%): 67.67%
-----------------------------------------------------------------------------------------------------------
	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 2 | Train Loss: 0.7959 | Train Accuracy: 72.12% | Test Loss: 0.8570 | Test Evaluation (%): 69.72%
-----------------------------------------------------------------------------------------------------------
	Trained for 625/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 3 | Train Loss: 0.7005 | Train Ac

#### Printing Model's Results

In [None]:
print(res_1)

{'model_train_loss': [0.9262752200603485, 0.7959172889471055, 0.7005182271003723, 0.6230178109288216, 0.5485881889939308, 0.48092842677533626, 0.4208540573477745, 0.3726929286956787, 0.33058629912734033, 0.29017846949845555], 'model_train_eval': [67.3775, 72.12, 75.565, 78.1925, 80.775, 83.145, 85.08, 86.8175, 88.4125, 89.6], 'model_valid_loss': [tensor(0.9335, device='cuda:0'), tensor(0.8570, device='cuda:0'), tensor(0.8144, device='cuda:0'), tensor(0.8088, device='cuda:0'), tensor(0.7882, device='cuda:0'), tensor(0.9021, device='cuda:0'), tensor(0.8734, device='cuda:0'), tensor(0.9405, device='cuda:0'), tensor(0.9650, device='cuda:0'), tensor(1.0410, device='cuda:0')], 'model_valid_eval': [67.67172523961662, 69.71845047923323, 71.20607028753993, 71.57547923322684, 73.5223642172524, 70.99640575079871, 72.96325878594249, 72.0547124600639, 72.33426517571885, 71.91493610223642], 'model_name': 'CIFAR10_ModelV0', 'model_loss_fn': 'CrossEntropyLoss', 'model_evaluating_m': 'accuracy_fn', 'mo

### Model 2

#### Hyperparameters

In [None]:
CONV_HIDDEN_1_SIZE_2 = 128
CONV_HIDDEN_2_SIZE_2 = 256
CONV_HIDDEN_3_SIZE_2 = 128
CONV_HIDDEN_4_SIZE_2 = 64

LR_2 = 1e-4

#### Creating the Model

In [None]:
class CIFAR10_ModelV2(nn.Module):
    def __init__(self, input_size, conv_hidden_1_size_2, conv_hidden_2_size_2, conv_hidden_3_size_2, conv_hidden_4_size_2, output_size):
        super().__init__()

        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_size,
                      out_channels=conv_hidden_1_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_1_size_2,
                      out_channels=conv_hidden_1_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # Output: (`batch_size`, `conv_hidden_1_size_2`, 110, 110)

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=conv_hidden_1_size_2,
                      out_channels=conv_hidden_2_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_2_size_2,
                      out_channels=conv_hidden_2_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # Output: (`batch_size`, `conv_hidden_2_size_2`, 53, 53)

        self.conv_block_3 = nn.Sequential(
            nn.Conv2d(in_channels=conv_hidden_2_size_2,
                      out_channels=conv_hidden_3_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_3_size_2,
                      out_channels=conv_hidden_3_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # Output: (`batch_size`, `conv_hidden_3_size_2`, 24, 24)
 
        self.conv_block_4 = nn.Sequential(
            nn.Conv2d(in_channels=conv_hidden_3_size_2,
                      out_channels=conv_hidden_4_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=conv_hidden_4_size_2,
                      out_channels=conv_hidden_4_size_2,
                      kernel_size=(3, 3),
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        ) # # Output: (`batch_size`, `conv_hidden_4_size_2`, 10, 10)

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=conv_hidden_4_size_2*10*10, out_features=output_size)
        )

    def forward(self, x):
        return self.classifier(self.conv_block_4(self.conv_block_3(self.conv_block_2(self.conv_block_1(x)))))

#### Initializing Model

In [None]:
modelv2 = CIFAR10_ModelV2(3, CONV_HIDDEN_1_SIZE_2, CONV_HIDDEN_2_SIZE_2, CONV_HIDDEN_3_SIZE_2, CONV_HIDDEN_4_SIZE_2, 10).to(device)

#### Setting Optimizer

In [None]:
optim_2 = opt.Adam(modelv2.parameters(), lr=LR_2)

#### Training the Model

In [None]:
res_2 = fit(modelv2, 10, train_dl_aug, valid_dl_aug, loss_fn, accuracy_fn, optim_2, 1, 10)

Starting Process...


  0%|          | 0/10 [00:00<?, ?it/s]

	Trained for 125/1250 batches...
	Trained for 250/1250 batches...
	Trained for 375/1250 batches...
	Trained for 500/1250 batches...
	Trained for 625/1250 batches...
	Trained for 750/1250 batches...
	Trained for 875/1250 batches...
	Trained for 1000/1250 batches...
	Trained for 1125/1250 batches...
	Trained for 1250/1250 batches...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 1 | Train Loss: 1.9184 | Train Accuracy: 30.86% | Test Loss: 1.7461 | Test Evaluation (%): 38.44%
-----------------------------------------------------------------------------------------------------------
	Trained for 125/1250 batches...
	Trained for 250/1250 batches...
	Trained for 375/1250 batches...
	Trained for 500/1250 batches...
	Trained for 625/1250 batches...
	Trained for 750/1250 batches...
	Trained for 875/1250 batches...
	Trained for 1000/1250 batches...
	Trained for 1125/1250 batches...
	Trained for 1250/1250 batches...
----------

### Model 3

#### Creating Transformations

To try and increase the accuracy of our CIFAR10 Model we are going to make some changes to the Dataset:

1. `Test Set for Validation`: We are going to use out test Dataset to valdate the model, instead of splitting the training Set into train-validation samples.

2. `Channel-wise` Data Normalization: We are going to normalize the image Tensors by subtracting the **mean(μ)** and dividing by the **standard deviation(σ)** across each channel. That way we `prevent` the values of any one channel to disproportionately affect the losses and gradients while training.

3. `Data Augmentation`: We are going to apply randomly chosen transformations while loading images from the training dataset. Since the transformation are going to be applied randomly and dynamically, the model sees slightly different images in each epoch of training, which helps for generalization.

In [10]:
# Setting up the Transformations
train_custom_transforms = transforms.Compose([
    transforms.Resize(size=(128, 128)),              # Resize the Image from (3, 32, 32) to (3, 64, 64)
    transforms.RandomCrop(size=128,                 # Output Size of the Crop
                          padding=4,               # Padding on each Border of the Image
                          padding_mode='reflect'), # Type of Padding
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010],
                         inplace=True)
])

valid_custom_transforms = transforms.Compose([
    transforms.Resize(size=(128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010],
                         inplace=True)
])

#### Loading the Dataset

In [11]:
train_ds = CIFAR10(root="/content", download=True, train=True, transform=train_custom_transforms)
test_ds = CIFAR10(root="/content", download=True, train=False, transform=valid_custom_transforms)
classes_names = train_ds.classes

print(len(train_ds), len(test_ds))
print(classes_names)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/cifar-10-python.tar.gz to /content
Files already downloaded and verified
50000 10000
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


#### Creating the Data Loaders

In [12]:
BATCH_SIZE = 128

train_dl = DataLoader(dataset=train_ds,
                      batch_size=BATCH_SIZE,
                      shuffle=True,
                      num_workers=cpu_count(),
                      pin_memory=True)

valid_dl = DataLoader(dataset=test_ds,
                      batch_size=BATCH_SIZE,
                      shuffle=False,
                      num_workers=cpu_count(),
                      pin_memory=True)

print(len(train_dl), len(valid_dl))

391 79


#### Residual Connections

Another way of improving our Model this by adding `resudial blocks`, which are adding the original input back to the output.

A great graph to demonstrate that is the following:

<center><img src="https://miro.medium.com/v2/resize:fit:1140/1*D0F3UitQ2l5Q0Ak-tjEdJg.png" width=500></center>

#### Creating the Model

In [29]:
# Defining a Helping Function
def conv_block(in_channels, out_channels, pooling):
    layers = [
        nn.Conv2d(in_channels=in_channels,
                  out_channels=out_channels,
                  kernel_size=(3, 3),
                  stride=1,
                  padding=1),
        nn.BatchNorm2d(num_features=out_channels), # Speeding up Training Process
        nn.SELU()
    ]

    if pooling:
        layers.append(nn.MaxPool2d(kernel_size=(2, 2)))

    return nn.Sequential(*layers)

In [30]:
class CIFAR10_ModelV3(nn.Module):
    def __init__(self, input_channels,
                 hidden_1_units_3, hidden_2_units_3, hidden_3_units_3, hidden_4_units_3, hidden_5_units_3,
                 output_size, dropout):
        super().__init__()

        # First Block (Output: [`batch_size`, `hidden_1_units_3`, 64, 64])
        self.conv_block_1_1 = conv_block(input_channels, hidden_1_units_3, pooling=False)
        self.conv_block_1_2 = conv_block(hidden_1_units_3, hidden_1_units_3, pooling=False)
        self.conv_block_1_3 = conv_block(hidden_1_units_3, hidden_1_units_3, pooling=True)
        self.res_1 = conv_block(hidden_1_units_3, hidden_1_units_3, pooling=False)
        self.res_1 = nn.Sequential(
            conv_block(hidden_1_units_3, hidden_1_units_3, pooling=False),
            conv_block(hidden_1_units_3, hidden_1_units_3, pooling=False)
        )

        # Second Block (Output: [`batch_size`, `hidden_2_units_3`, 32, 32])
        self.conv_block_2_1 = conv_block(hidden_1_units_3, hidden_2_units_3, pooling=False)
        self.conv_block_2_2 = conv_block(hidden_2_units_3, hidden_2_units_3, pooling=False)
        self.conv_block_2_3 = conv_block(hidden_2_units_3, hidden_2_units_3, pooling=True)
        self.res_2 = nn.Sequential(
            conv_block(hidden_2_units_3, hidden_2_units_3, pooling=False),
            conv_block(hidden_2_units_3, hidden_2_units_3, pooling=False)
        )

        # Third Block (Output: [`batch_size`, `hidden_3_units_3`, 16, 16])
        self.conv_block_3_1 = conv_block(hidden_2_units_3, hidden_3_units_3, pooling=False)
        self.conv_block_3_2 = conv_block(hidden_3_units_3, hidden_3_units_3, pooling=False)
        self.conv_block_3_3 = conv_block(hidden_3_units_3, hidden_3_units_3, pooling=True)
        self.res_3 = nn.Sequential(
            conv_block(hidden_3_units_3, hidden_3_units_3, pooling=False),
            conv_block(hidden_3_units_3, hidden_3_units_3, pooling=False)
        )

        # Fourth Block (Output: [`batch_size`, `hidden_4_units_3`, 8, 8])
        self.conv_block_4_1 = conv_block(hidden_3_units_3, hidden_4_units_3, pooling=False)
        self.conv_block_4_2 = conv_block(hidden_4_units_3, hidden_4_units_3, pooling=False)
        self.conv_block_4_3 = conv_block(hidden_4_units_3, hidden_4_units_3, pooling=True)
        self.res_4 = nn.Sequential(
            conv_block(hidden_4_units_3, hidden_4_units_3, pooling=False),
            conv_block(hidden_4_units_3, hidden_4_units_3, pooling=False)
        )

        # Fifth Block (Output: [`batch_size`, `hidden_5_units_3`, 4, 4])
        self.conv_block_5_1 = conv_block(hidden_4_units_3, hidden_5_units_3, pooling=False)
        self.conv_block_5_2 = conv_block(hidden_5_units_3, hidden_5_units_3, pooling=False)
        self.conv_block_5_3 = conv_block(hidden_5_units_3, hidden_5_units_3, pooling=True)
        self.res_5 = nn.Sequential(
            conv_block(hidden_5_units_3, hidden_5_units_3, pooling=False),
            conv_block(hidden_5_units_3, hidden_5_units_3, pooling=False)
        )

        # Classifier Block (Output: [`batch_size`, `output_size`])
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(dropout),
            nn.Linear(in_features=hidden_5_units_3*4*4,
                      out_features=output_size)
        )

    def forward(self, x):
        x = self.conv_block_1_3(self.conv_block_1_2(self.conv_block_1_1(x)))
        x = self.res_1(x) + x

        x = self.conv_block_2_3(self.conv_block_2_2(self.conv_block_2_1(x)))
        x = self.res_2(x) + x

        x = self.conv_block_3_3(self.conv_block_3_2(self.conv_block_3_1(x)))
        x = self.res_3(x) + x

        x = self.conv_block_4_3(self.conv_block_4_2(self.conv_block_4_1(x)))
        x = self.res_4(x) + x

        x = self.conv_block_5_3(self.conv_block_5_2(self.conv_block_5_1(x)))
        x = self.res_5(x) + x

        return self.classifier(x)

#### Initializing the Model

In [31]:
modelv3 = CIFAR10_ModelV3(3, 64, 128, 256, 256, 128, 10, 0.2).to(device)

summary(modelv3,
        input_size=(BATCH_SIZE, 3, 128, 128), # (`batch_size`, `colour channels`, `height`, `width`)
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
CIFAR10_ModelV3 (CIFAR10_ModelV3)        [128, 3, 128, 128]   [128, 10]            --                   True
├─Sequential (conv_block_1_1)            [128, 3, 128, 128]   [128, 64, 128, 128]  --                   True
│    └─Conv2d (0)                        [128, 3, 128, 128]   [128, 64, 128, 128]  1,792                True
│    └─BatchNorm2d (1)                   [128, 64, 128, 128]  [128, 64, 128, 128]  128                  True
│    └─SELU (2)                          [128, 64, 128, 128]  [128, 64, 128, 128]  --                   --
├─Sequential (conv_block_1_2)            [128, 64, 128, 128]  [128, 64, 128, 128]  --                   True
│    └─Conv2d (0)                        [128, 64, 128, 128]  [128, 64, 128, 128]  36,928               True
│    └─BatchNorm2d (1)                   [128, 64, 128, 128]  [128, 64, 128, 128]  128                  True
│    └─SELU (2) 

#### Setting Optimizer

In [32]:
optim = opt.Adam(params=modelv3.parameters(), lr=1e-3)

#### Training/Evaluating the Model

In [33]:
res_3 = fit(model=modelv3,
            epochs=10,
            train_dl=train_dl,
            valid_dl=valid_dl,
            loss_fn=loss_fn,
            eval_metric=accuracy_fn,
            optim=optim,
            n_train_batch_prints=10)

Starting Process...


  0%|          | 0/10 [00:00<?, ?it/s]

	1) Looked at 4992/50048 training samples...
	2) Looked at 9984/50048 training samples...
	3) Looked at 14976/50048 training samples...
	4) Looked at 19968/50048 training samples...
	5) Looked at 24960/50048 training samples...
	6) Looked at 29952/50048 training samples...
	7) Looked at 34944/50048 training samples...
	8) Looked at 39936/50048 training samples...
	9) Looked at 44928/50048 training samples...
	10) Looked at 49920/50048 training samples...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 1 | Train Loss: 1.6431 | Train Accuracy: 40.73% | Test Loss: 1.3816 | Test Evaluation (%): 49.96%
-----------------------------------------------------------------------------------------------------------
	1) Looked at 4992/50048 training samples...
	2) Looked at 9984/50048 training samples...
	3) Looked at 14976/50048 training samples...
	4) Looked at 19968/50048 training samples...
	5) Looked at 24960/50048 training 

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3e66b64a60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3e66b64a60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

	10) Looked at 49920/50048 training samples...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 2 | Train Loss: 1.1487 | Train Accuracy: 59.23% | Test Loss: 1.0633 | Test Evaluation (%): 62.58%
-----------------------------------------------------------------------------------------------------------
	1) Looked at 4992/50048 training samples...
	2) Looked at 9984/50048 training samples...
	3) Looked at 14976/50048 training samples...
	4) Looked at 19968/50048 training samples...
	5) Looked at 24960/50048 training samples...
	6) Looked at 29952/50048 training samples...
	7) Looked at 34944/50048 training samples...
	8) Looked at 39936/50048 training samples...
	9) Looked at 44928/50048 training samples...


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3e66b64a60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f3e66b64a60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

	10) Looked at 49920/50048 training samples...
-----------------------------------------------------------------------------------------------------------
-> Epoch: 3 | Train Loss: 0.9478 | Train Accuracy: 66.46% | Test Loss: 0.9563 | Test Evaluation (%): 67.09%
-----------------------------------------------------------------------------------------------------------
	1) Looked at 4992/50048 training samples...
	2) Looked at 9984/50048 training samples...
	3) Looked at 14976/50048 training samples...
	4) Looked at 19968/50048 training samples...
	5) Looked at 24960/50048 training samples...
	6) Looked at 29952/50048 training samples...
	7) Looked at 34944/50048 training samples...


KeyboardInterrupt: ignored