# Import

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from typing import Tuple, NoReturn

# Dataloader

In [2]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor(), download=True)
train_dataset,validation_dataset = random_split(train_dataset,[round(0.9 * len(train_dataset)), round(0.1 * len(train_dataset))])

Files already downloaded and verified
Files already downloaded and verified


In [3]:
BATCH_SIZE = 128
SHUFFLE = True
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)
val_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)


## Visualizing Data

# Model

In [17]:
class SCNN(nn.Module):
    def __init__(self, activation: str, batch_norm: str, dropout: bool) -> None:
        super().__init__()

        #BASE MODEL
        
        #Conv Block 1
        #(Nx3x32x32) -> (Nx16x32x32)
        conv1 = nn.Conv2d(in_channels=3, out_channels=32,
                          kernel_size=5, padding=2)

        #Conv Block 2
        #(Nx32x32x32) -> (Nx64x28x28)
        conv2 = nn.Conv2d(in_channels=32, out_channels=64,
                          kernel_size=5, stride=1)
        #(Nx64x28x28) -> (Nx64x14x14)
        pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        #Conv Block 3
        #(Nx64x14x14) -> (Nx128x12x12)
        conv3 = nn.Conv2d(in_channels=64, out_channels=128,
                          kernel_size=3, padding=0, stride=1)

        #(Nx128x12x12) -> (Nx128x6x6)
        pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        
        in_dim = 128*6*6
        fc1 = nn.Linear(in_dim,1024)
        fc2 = nn.Linear(1024,10)

        activation_layer = None
        dropout_layer = None
        if activation == 'relu':
            activation_layer = nn.ReLU()
        elif activation == 'lrelu':
            activation_layer = nn.LeakyReLU()
        else:
            if activation == 'elu':
                activation_layer = nn.ELU()



        self.conv_net = nn.Sequential(conv1,
                                     activation_layer,
                                     conv2,
                                     activation_layer,
                                     pool2,
                                     conv3,
                                     activation_layer,
                                     pool3,
                                     nn.Flatten(start_dim=1),
                                     fc1,
                                     activation_layer,
                                     fc2
                                     )

        if batch_norm == 'bnorm':
            bnorm1 = nn.BatchNorm2d(32)
            bnorm2 = nn.BatchNorm2d(64)
            bnorm3 = nn.BatchNorm2d(128)

            self.conv_net = nn.Sequential(conv1,
                                          activation_layer,
                                          bnorm1,
                                          conv2,
                                          activation_layer,
                                          pool2,
                                          bnorm2,
                                          conv3,
                                          activation_layer,
                                          pool3,
                                          bnorm3,
                                          nn.Flatten(start_dim=1),
                                          fc1,
                                          activation_layer,
                                          fc2
                                          )
        elif batch_norm == 'gnorm':

            gnorm1 = nn.GroupNorm(8,32)
            gnorm2 = nn.GroupNorm(8,64)
            gnorm3 = nn.GroupNorm(8,128)

            self.conv_net = nn.Sequential(conv1,
                                          activation_layer,
                                          gnorm1,
                                          conv2,
                                          activation_layer,
                                          pool2,
                                          gnorm2,
                                          conv3,
                                          activation_layer,
                                          pool3,
                                          gnorm3,
                                          nn.Flatten(start_dim=1),
                                          fc1,
                                          activation_layer,
                                          fc2
                                          )

        else:
            pass




        #############################################################################################
        #Add dropout in similar way, so one code for all
        ###############################################################################################

    def forward(self, x):
        
        
        y = self.conv_net(x)

        return y

def count_model_params(model):
    """ Counting the number of learnable parameters in a nn.Module """
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params

In [20]:
cnn = SCNN(activation='relu',batch_norm='gnorm',dropout=False)
params = count_model_params(cnn)
print(cnn)
print(f"Model has {params} learnable parameters")


SCNN(
  (conv_net): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): GroupNorm(8, 32, eps=1e-05, affine=True)
    (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): GroupNorm(8, 64, eps=1e-05, affine=True)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): GroupNorm(8, 128, eps=1e-05, affine=True)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=4608, out_features=1024, bias=True)
    (13): ReLU()
    (14): Linear(in_features=1024, out_features=10, bias=True)
  )
)
Model has 4857866 learnable parameters


# Training

## Todo

-> Transforms for normalizing the input image <br />
-> Saving checkpoints with all the stats <br />
-> Visualization of images and plots for eval and train loss <br />
-> Early stopping criterion <br />
-> Accuracy above 85% by modifying parameters <br />

## Parameters for training

In [21]:
LR = 3e-4
EPOCHS = 30
EVAL_FREQ = 10

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cnn = cnn.to(device)

In [23]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(params=cnn.parameters(), lr=LR)

In [24]:
@torch.no_grad()
def eval_model(model):
    correct = 0
    total = 0
    loss_list = []

    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass only to get logits/output
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Get predictions from the maximum value
        preds = torch.argmax(outputs, dim=1)
        correct += len(torch.where(preds == labels)[0])
        total += len(labels)

    # Total correct predictions and loss
    accuracy = correct / total * 100
    loss = np.mean(loss_list)
    return accuracy, loss

In [25]:
for epoch in range(EPOCHS):
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i,(images,labels) in progress_bar:

        x = images.to(device)
        y_train = labels.to(device)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = cnn(x)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, y_train)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()
        progress_bar.set_description(f"Epoch {epoch+1} Iter {i+1}: loss {loss.item():.5f}. ")

    if (epoch+1) % EVAL_FREQ == 0:
        accuracy, valid_loss = eval_model(cnn)
        print(f"Accuracy at epoch {epoch}: {round(accuracy, 2)}%")


Epoch 1 Iter 352: loss 0.87814. : 100%|██████████| 352/352 [00:04<00:00, 72.39it/s]
Epoch 2 Iter 352: loss 0.63094. : 100%|██████████| 352/352 [00:04<00:00, 83.56it/s]
Epoch 3 Iter 352: loss 0.51102. : 100%|██████████| 352/352 [00:04<00:00, 83.92it/s]
Epoch 4 Iter 208: loss 0.29660. :  59%|█████▉    | 208/352 [00:02<00:01, 82.06it/s]


KeyboardInterrupt: 