### 5.3 Programming Task: Digit recognition using CNNs

In [2]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [8]:
class ConvNet(nn.Module):
    def __init__(self):
        
        #Initialize the base class nn.Module
        super(ConvNet, self).__init__()
        
        # Initializa all the layers with their parameters
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(20 * 12 * 12, 100)
        self.fc2 = nn.Linear(100, 10)


    def forward(self, x):
        #Convolutional layer with ReLu activation
        x = F.relu(self.conv1(x))
        
        #Pooling layer
        x = self.pool(x)
        
        # Flatten layer
        x = x.view(-1, 20 * 12 * 12)
        
        # Fully connected layer with ReLu activation
        x = F.relu(self.fc1(x))
        
        # Fully connected layer that returns probabilities of classes
        x = self.fc2(x)
        return x
    

Show the net.

In [9]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2880, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [10]:
# Set hyper parameters.
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

In [13]:
# Load the MNIST data set.
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = Data.DataLoader(dataset = trainset, batch_size=batch_size, shuffle=True)

# Download and load the test data
testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)
testloader = Data.DataLoader(dataset = testset, batch_size=batch_size, shuffle=True)

In [15]:
# Set the loss function and the optimization criteria
# Loss Function - cross entropy loss, most common for multi class clasification
model = ConvNet()
criterion = nn.CrossEntropyLoss()

# Optimizer - popular in deep learning nowadays
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
# Run the main training loop
for epoch in range(num_epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass: compute the predicted outputs by passing inputs to the model
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # Perform a single optimization step (parameter update)
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')


[1, 200] loss: 0.527
[1, 400] loss: 0.166
[1, 600] loss: 0.119
[1, 800] loss: 0.106
[2, 200] loss: 0.069
[2, 400] loss: 0.071
[2, 600] loss: 0.066
[2, 800] loss: 0.059
[3, 200] loss: 0.047
[3, 400] loss: 0.053
[3, 600] loss: 0.048
[3, 800] loss: 0.044
[4, 200] loss: 0.032
[4, 400] loss: 0.032
[4, 600] loss: 0.036
[4, 800] loss: 0.037
[5, 200] loss: 0.024
[5, 400] loss: 0.027
[5, 600] loss: 0.033
[5, 800] loss: 0.034
[6, 200] loss: 0.020
[6, 400] loss: 0.021
[6, 600] loss: 0.030
[6, 800] loss: 0.026
[7, 200] loss: 0.016
[7, 400] loss: 0.020
[7, 600] loss: 0.021
[7, 800] loss: 0.020
[8, 200] loss: 0.016
[8, 400] loss: 0.012
[8, 600] loss: 0.018
[8, 800] loss: 0.014
[9, 200] loss: 0.011
[9, 400] loss: 0.013
[9, 600] loss: 0.013
[9, 800] loss: 0.021
[10, 200] loss: 0.010
[10, 400] loss: 0.014
[10, 600] loss: 0.012
[10, 800] loss: 0.010
Finished Training


In [17]:
# Run the testing loop

# Switch the model to evaluation mode
model.eval()

correct = 0
total = 0

# No gradient is needed for evaluation
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # Forward pass to get outputs
        outputs = model(images)

        # The class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')


Accuracy of the network on the 10000 test images: 98.81%


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [22]:
conv2d_params = (5 * 5 * 1 * 20) + 20
maxpool_params = 0 #non trainable
flatten = 0 #non trainable
flatten_size = 20 * 12 * 12
fullyconn1_params = (flatten_size * 100) + 100
fullyconn2_params = 100 * 10 + 10

params = conv2d_params + fullyconn1_params + fullyconn2_params

print(params)

from torchinfo import summary

model = ConvNet()
summary(model, input_size=(1, 1, 28, 28))


289630


Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [1, 10]                   --
├─Conv2d: 1-1                            [1, 20, 24, 24]           520
├─MaxPool2d: 1-2                         [1, 20, 12, 12]           --
├─Linear: 1-3                            [1, 100]                  288,100
├─Linear: 1-4                            [1, 10]                   1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.59
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 1.16
Estimated Total Size (MB): 1.25