<div style='background: cyan; font-size: 74px; font-weight: bold'>CNN BASICS</div>

In [1]:
import torch
from tqdm.auto import tqdm
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.datasets as datasets

<div style='background: cyan; font-size: 74px; font-weight: bold'>DATA INGESTION</div>

In [2]:
transforms = transforms.Compose([
    transforms.Resize((24, 24)),
    transforms.Grayscale(), # Convert an RGB to a grey channel (blank and white)
    transforms.ToTensor()
])

In [3]:
img_dataset = datasets.ImageFolder(root='../datasets/processed/', transform=transforms)
loader = DataLoader(dataset=img_dataset, batch_size=32, shuffle=True)

<div style='background: cyan; font-size: 74px; font-weight: bold'>MODEL DEFINITION (MLP)</div>

In [4]:
class Mlp(nn.Module):
    def __init__(self, input_size=24*24, hidden_size=128, output_size=38):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(in_features=hidden_size, out_features=output_size) # Output -> [batch_size, num_classes]
        # self.softmax = nn.Softmax(dim=1)
    def forward(self, x):
        x = x.view(x.size(0), -1) # Flatten the inputs
        x = self.relu_1(self.layer_1(x))
        x = self.layer_2(x)
        return x

<div style='background: cyan; font-size: 74px; font-weight: bold'>TRAINING LOOP</div>

In [5]:
simple_mlp = Mlp()
criterion = nn.CrossEntropyLoss() # Also handles softmax internally
optimizer = torch.optim.Adam(simple_mlp.parameters(), lr=0.001) # Update model params

In [6]:
# Training Loop
num_epochs = 10

for epoch in range(num_epochs):
    simple_mlp.train() # Mostly for models with batch-norm
    running_loss = 0.0

    progress_bar = tqdm(loader, ncols=1000, desc=f"Epoch {epoch+1}/{num_epochs}") # For showing progress bars
    for imgs, labels in progress_bar:
        outputs = simple_mlp(imgs) # Forward pass, data ingestion
        loss = criterion(outputs, labels) # Loss calculation

        optimizer.zero_grad() # Set gradients to zero, cause grads from the previous epoch are retained by default
        loss.backward() # perform backpropagation
        optimizer.step() # Update weights

        progress_bar.set_postfix(loss=loss.item())

        running_loss += loss.item()
    epoch_loss = running_loss / len(loader)
    print(f"   Epoch {epoch+1}, - Avg Loss: {epoch_loss:.4f}") # Track loss for each epoch

Epoch 1/10:   0%|                                                                                             …

   Epoch 1, - Avg Loss: 3.1668


Epoch 2/10:   0%|                                                                                             …

   Epoch 2, - Avg Loss: 2.8790


Epoch 3/10:   0%|                                                                                             …

   Epoch 3, - Avg Loss: 2.7249


Epoch 4/10:   0%|                                                                                             …

   Epoch 4, - Avg Loss: 2.6015


Epoch 5/10:   0%|                                                                                             …

   Epoch 5, - Avg Loss: 2.5371


Epoch 6/10:   0%|                                                                                             …

   Epoch 6, - Avg Loss: 2.4580


Epoch 7/10:   0%|                                                                                             …

   Epoch 7, - Avg Loss: 2.4018


Epoch 8/10:   0%|                                                                                             …

   Epoch 8, - Avg Loss: 2.3594


Epoch 9/10:   0%|                                                                                             …

   Epoch 9, - Avg Loss: 2.3135


Epoch 10/10:   0%|                                                                                            …

   Epoch 10, - Avg Loss: 2.2776


In [7]:
# logit = torch.tensor([[0.9, 1.2, 3.5, 2.6],
#                       [0.01, 0.78, 2.3, 8.1],
#                       [1.9, 2.81, 5.5, 3.01]]
#                       )
# #apply softmax
# softmax = nn.Softmax(dim=1)
# logit_proba = softmax(logit)
# pred_args = torch.argmax(logit, dim=1)
# ground_truth_labels = torch.tensor([[2, 1, 2]])
# pred = (ground_truth_labels==pred_args)
# pred

<div style='background: cyan; font-size: 74px; font-weight: bold'>MODEL DEFINITION (CNN)</div>

In [8]:
from torchvision import transforms
transforms_rgb = transforms.Compose([
    transforms.Resize((24, 24)),
    transforms.ToTensor()
])

In [None]:
"""
IMPORTANT!!!!
For a JxK image and an MxM filter/kernel
Conv2d's output is calculated as (J-M+1) X (K-M+1) for zero padding
for padding applied, the output is calculated as ((J+2P-M)+1) x ((K+2P-M)+1)

MaxPool2d layer's output is calculated as (..., (J-M+1)/2, (K-M+1)/2) if there's zero padding
"""
class CNN(nn.Module):
    def __init__(self, input_size=24*24, hidden_size=64, output_size=38):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3) # output->(5, 22, 22)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3) # output->(10, 20, 20)
        self.relu2 = nn.ReLU()

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) # output ->(10, 10, 10)

        self.layer_1 = nn.Linear(in_features=10*10*10, out_features=hidden_size)
        self.relu3 = nn.ReLU()
        self.layer_2 = nn.Linear(in_features=hidden_size, out_features=output_size)

    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.relu2(self.conv2(x))

        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.relu3(self.layer_1(x))
        x = self.layer_2(x)
        return x


# Data prep
img_dataset_rgb = datasets.ImageFolder(root='../datasets/processed/', transform=transforms_rgb)
loader_rgb = DataLoader(dataset=img_dataset_rgb, batch_size=32, shuffle=True)

# cnn model instance
cnn = CNN()
criterion1 = nn.CrossEntropyLoss() # Also handles softmax internally
optimizer1 = torch.optim.Adam(cnn.parameters(), lr=0.001) # Update model params

# Training Loop
num_epochs = 10

for epoch in range(num_epochs):
    simple_mlp.train()
    running_loss = 0.0

    progress_bar = tqdm(loader_rgb, ncols=1000, desc=f"Epoch {epoch+1}/{num_epochs}")
    for imgs, labels in progress_bar:
        outputs = cnn(imgs)
        loss = criterion1(outputs, labels)

        optimizer1.zero_grad()
        loss.backward()
        optimizer1.step()

        progress_bar.set_postfix(loss=loss.item())

        running_loss += loss.item()
    epoch_loss = running_loss / len(loader)
    print(f"   Epoch {epoch+1}, - Avg Loss: {epoch_loss:.4f}")

Epoch 1/10:   0%|                                                                                             …