# Import

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

# Dataloader

In [2]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor(), download=True)
train_dataset,validation_dataset = random_split(train_dataset,[round(0.9 * len(train_dataset)), round(0.1 * len(train_dataset))])

Files already downloaded and verified
Files already downloaded and verified


In [3]:
BATCH_SIZE = 128
SHUFFLE = True
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)
val_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=SHUFFLE)


## Visualizing Data

# Model

In [4]:
class SCNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        #Conv Block 1
        #(Nx3x32x32) -> (Nx16x32x32) 
        conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,padding=2)
        relu1 = nn.ReLU()

        #Conv Block 2
        #(Nx16x32x32) -> (Nx32x28x28)
        conv2 = nn.Conv2d(in_channels=16, out_channels=32,
                          kernel_size=5, stride = 1)
        relu2 = nn.ReLU()
        #(Nx32x28x28) -> (Nx32x14x14)
        pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        #Conv Block 3
        #(Nx32x14x14) -> (Nx64x12x12)
        conv3 = nn.Conv2d(in_channels=32, out_channels=64,
                          kernel_size=3, padding=0,stride=1)
        relu3 = nn.ReLU()

        #Conv Block 4
        #(Nx64x12x12) -> (Nx128x8x8)
        conv4 = nn.Conv2d(in_channels=64, out_channels=128,
                          kernel_size=5, stride = 1,padding=0)
        relu4 = nn.ReLU()
        #(Nx128x8x8) -> (Nx128x4x4)
        pool4 = nn.MaxPool2d(kernel_size=2, stride=2)


        self.layer1 = nn.Sequential(conv1, relu1)
        self.layer2 = nn.Sequential(conv2, relu2, pool2)
        self.layer3 = nn.Sequential(conv3,relu3)
        self.layer4 = nn.Sequential(conv4, relu4, pool4)
        
        in_dim = 128*4*4
        fc1 = nn.Linear(in_dim,512)
        fc2 = nn.Linear(512,10)

        self.fc_layer = nn.Sequential(nn.Flatten(start_dim=1),fc1,nn.ReLU(),fc2)


    def forward(self, x):
        
        out1 = self.layer1(x)
        
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        y = self.fc_layer(out4)

        return y

def count_model_params(model):
    """ Counting the number of learnable parameters in a nn.Module """
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params

In [5]:
cnn = SCNN()
params = count_model_params(cnn)
print(cnn)
print(f"Model has {params} learnable parameters")


SCNN(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=2048, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=10, bias=True)
  )
)
Model has 1291690 learnable parameters


# Training

## Parameters for training

In [6]:
LR = 3e-4
EPOCHS = 100

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cnn = cnn.to(device)

In [8]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(params=cnn.parameters(), lr=LR)

In [9]:
for epoch in range(EPOCHS):
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i,(images,labels) in progress_bar:

        x = images.to(device)
        y_train = labels.to(device)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = cnn(x)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, y_train)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()
        progress_bar.set_description(f"Epoch {epoch+1} Iter {i+1}: loss {loss.item():.5f}. ")


Epoch 1 Iter 352: loss 1.60483. : 100%|██████████| 352/352 [00:03<00:00, 107.37it/s]
Epoch 2 Iter 352: loss 1.34973. : 100%|██████████| 352/352 [00:02<00:00, 120.57it/s]
Epoch 3 Iter 352: loss 1.21989. : 100%|██████████| 352/352 [00:02<00:00, 121.38it/s]
Epoch 4 Iter 352: loss 1.14208. : 100%|██████████| 352/352 [00:02<00:00, 122.80it/s]
Epoch 5 Iter 352: loss 0.97450. : 100%|██████████| 352/352 [00:02<00:00, 121.05it/s]
Epoch 6 Iter 352: loss 1.12144. : 100%|██████████| 352/352 [00:02<00:00, 120.61it/s]
Epoch 7 Iter 352: loss 1.13205. : 100%|██████████| 352/352 [00:02<00:00, 121.80it/s]
Epoch 8 Iter 352: loss 1.21765. : 100%|██████████| 352/352 [00:02<00:00, 120.90it/s]
Epoch 9 Iter 352: loss 0.94093. : 100%|██████████| 352/352 [00:03<00:00, 116.76it/s]
Epoch 10 Iter 352: loss 0.89889. : 100%|██████████| 352/352 [00:02<00:00, 120.36it/s]
Epoch 11 Iter 352: loss 0.58987. : 100%|██████████| 352/352 [00:02<00:00, 120.53it/s]
Epoch 12 Iter 352: loss 0.63147. : 100%|██████████| 352/352 [00