In [1]:
# Imports
import numpy as np 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from tqdm.notebook import tqdm, trange

In [2]:
class MNIST_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        # To figure out size of matrix: [(W−K+2P)/S]+1 = Z --> [Features|Channels, Z, Z]
            # W input size | K kernel size | P padding | S Stride
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) # P1:[32, 28, 28] P2:[32, 30, 30]
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1) #P1:[32, 28, 28] P2:[32, 32, 32]
        
        self.pool1 = nn.MaxPool2d(kernel_size=2) # Stride=kernel_size (loss of data occurs when stride>kernelsize
            #P1:[32, 14, 14] P2:[32, 16, 16]
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) #P1:[64, 14, 14] P2:[64, 18, 18]
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1) #P1:[64, 14, 14] P2:[64, 20, 20]
        
        self.pool2 = nn.MaxPool2d(kernel_size=2) # pool_kernelSize determines how spatial dimensions are downsampled or reduced in size (kernal_size=2 from 28x28 image = 14x14) since pooling occured twice=7x7
            #P1:[64, 7, 7] P2:[64, 10, 10]
        
        # Flatten
        self.fc1 = nn.Linear(7*7*64, 256) # spatial dimension x feature maps or num of channels in output tensor after the convolutional layers
        self.fc2 = nn.Linear(256, 10) # Flattening is necessary cuz fully connected layers expect a 1D vector of input features
            # Flatten: Why 2 fully connected layers are necessary: If single connected layer, the model would essentially be a linear classifier on top of the convolutional features, which would severely limit the model's ability to learn complex patterns and decision boundaries. By introducing an intermediate fully connected layer, we alow the model to learn a non-linear transformation of the flattened convolutional features, which can capture more complex relationships and patterns in the data.

    def forward(self, x):
        # conv layer 1
        x = self.conv1(x)
        x = F.relu(x)
       
        # conv layer 2
        x = self.conv2(x)
        x = F.relu(x)
        
        # max pool 1
            # x = F.max_pool2d(x, kernel_size=2) same as below
        x = self.pool1(x)
        
        # conv layer 3
        x = self.conv3(x)
        x = F.relu(x)
        
        # conv layer 4
        x = self.conv4(x)
        x = F.relu(x)
        
        # max pool 1
            # x = F.max_pool2d(x, kernel_size=2) same as below
        x = self.pool2(x)
        
        # Flatten
        x = x.view(-1, 7*7*64)
        
        # fc layer 1
        x = self.fc1(x)
        x = F.relu(x)
        
        # fc layer 2
        x = self.fc2(x)
        return x       

In [2]:
class MNIST_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        # To figure out size of matrix: [(W−K+2P)/S]+1 = Z --> [Features|Channels, Z, Z] ** Convolution layer
            # W input size | K kernel size | P padding | S Stride
        
        # To figure out size of matrix: [(W−K+2P)/S] = Z --> [Features|Channels, Z, Z] ** Pool layer
            # W input size | K kernel size | P padding | S Stride
        
        # Total parameters = Sum(Convolutional layer) --> [K*K*(previousLayer_num_filters) + stride]*num_filters
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=2) # P1:[32, 28, 28] P2:[32, 30, 30]
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=2) #P1:[32, 28, 28] P2:[32, 32, 32]
        
        self.pool1 = nn.MaxPool2d(kernel_size=2) # Stride=kernel_size (loss of data occurs when stride>kernelsize
            #P1:[32, 14, 14] P2:[32, 16, 16]
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=2) #P1:[64, 14, 14] P2:[64, 18, 18]
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=2) #P1:[64, 14, 14] P2:[64, 20, 20]
        
        self.pool2 = nn.MaxPool2d(kernel_size=2) # pool_kernelSize determines how spatial dimensions are downsampled or reduced in size (kernal_size=2 from 28x28 image = 14x14) since pooling occured twice=7x7
            #P1:[64, 7, 7] P2:[64, 10, 10]
        
        # Flatten
        self.fc1 = nn.Linear(10*10*64, 256) # spatial dimension x feature maps or num of channels in output tensor after the convolutional layers
        self.fc2 = nn.Linear(256, 10) # Flattening is necessary cuz fully connected layers expect a 1D vector of input features
            # Flatten: Why 2 fully connected layers are necessary: If single connected layer, the model would essentially be a linear classifier on top of the convolutional features, which would severely limit the model's ability to learn complex patterns and decision boundaries. By introducing an intermediate fully connected layer, we alow the model to learn a non-linear transformation of the flattened convolutional features, which can capture more complex relationships and patterns in the data.

    def forward(self, x):
        # conv layer 1
        x = self.conv1(x)
        x = F.relu(x)
       
        # conv layer 2
        x = self.conv2(x)
        x = F.relu(x)
        
        # max pool 1
            # x = F.max_pool2d(x, kernel_size=2) same as below
        x = self.pool1(x)
        
        # conv layer 3
        x = self.conv3(x)
        x = F.relu(x)
        
        # conv layer 4
        x = self.conv4(x)
        x = F.relu(x)
        
        # max pool 1
            # x = F.max_pool2d(x, kernel_size=2) same as below
        x = self.pool2(x)
        
        # Flatten
        x = x.view(-1, 10*10*64)
        
        # fc layer 1
        x = self.fc1(x)
        x = F.relu(x)
        
        # fc layer 2
        x = self.fc2(x)
        return x       

In [3]:
# Load the data
mnist_train = datasets.MNIST(root="./datasets", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root="./datasets", train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

In [4]:
## Training
# Instantiate model
model = MNIST_CNN() # <---- Change to any model you want here we use MNIST_CNN

In [5]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # <-- Change optimizer of lr if you want

In [6]:
# Iterate through train set minibatchs
for epoch in trange(3): # <--- Change here
    for images, labels in tqdm(train_loader):
        # Zero out the gradients
        optimizer.zero_grad()
        
        # Forward Pass
        x = images # <--- Change here
        y = model(x)
        loss = criterion(y, labels)
        
        # Backward Pass
        loss.backward()
        optimizer.step()

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/600 [00:00<?, ?it/s]

  0%|          | 0/600 [00:00<?, ?it/s]

  0%|          | 0/600 [00:00<?, ?it/s]

In [7]:
## Testing 
correct = 0
total = len(mnist_test)

with torch.no_grad():
    # Iterate through test set minibatchs
    for images, labels in tqdm(test_loader):
        # Forward Pass
        x = images # <--- Change here
        y = model(x)
        
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())
        
print('Test accuracy: {}'.format(correct / total))

  0%|          | 0/100 [00:00<?, ?it/s]

Test accuracy: 0.9905999898910522


In [9]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

870634


In [ ]:
# 0.989300012588501 #Padding of 1
# 0.9882000088691711 #Padding of 2