### 5.3 Programming Task: Digit recognition using CNNs

In [1]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [2]:
class ConvNet(nn.Module):
    def __init__(self,input_shape):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 20, (5, 5))
        self.maxpool=nn.MaxPool2d(2, 2)
        self.relu=nn.ReLU()
        self.fc1=nn.Linear(20*12*12, 100)
        self.fc2=nn.Linear(100, 10)


    def forward(self, x):
        x=self.relu(self.conv1(x))
        x=self.maxpool(x)
        x=x.view(-1,20*12*12)
        x=self.relu(self.fc1(x))
        x=self.fc2(x)
        return x

Show the net.

In [3]:
net = ConvNet((1,28,28))
print(net.parameters)

<bound method Module.parameters of ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (fc1): Linear(in_features=2880, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)>


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [51]:
# Set hyper parameters.
batch_Size = 64
num_Classes = 10
learning_Rate= 0.01
epochs=10
train_transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.,),(1.,)),transforms.RandomAffine(5)])
test_transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.,),(1.,))])

In [52]:
# Load the MNIST data set.
torch.manual_seed(0)
train_data=datasets.MNIST('.',download=True,train=True,transform=train_transform)
test_data=datasets.MNIST('.',download=True,train=False,transform=test_transform)
train_loader=Data.DataLoader(train_data,batch_size=batch_Size,shuffle=True)
test_loader=Data.DataLoader(test_data,batch_size=batch_Size,shuffle=True)

In [53]:
# Set the loss function and the optimization criteria
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(), lr=learning_Rate, momentum=0.9)

In [54]:
# Run the main training loop
torch.manual_seed(0)
device = "cuda" if torch.cuda.is_available() else "cpu"
net.to(device)
training_loss=[]
for epoch in range(epochs):
    net.train()
    print(f"Starting epoch {epoch+1}...")
    running_loss=0.0
    for images, labels in train_loader:
        images, labels = images.to(device),labels.to(device)
        predictions=net(images)
        loss=criterion(predictions,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}')
    epoch_loss=running_loss/len(train_loader)
    training_loss.append(epoch_loss)
            


Starting epoch 1...
Epoch [1/10], Loss: 0.0668
Starting epoch 2...
Epoch [2/10], Loss: 0.0348
Starting epoch 3...
Epoch [3/10], Loss: 0.0257
Starting epoch 4...
Epoch [4/10], Loss: 0.0193
Starting epoch 5...
Epoch [5/10], Loss: 0.0151
Starting epoch 6...
Epoch [6/10], Loss: 0.0130
Starting epoch 7...
Epoch [7/10], Loss: 0.0106
Starting epoch 8...
Epoch [8/10], Loss: 0.0090
Starting epoch 9...
Epoch [9/10], Loss: 0.0078
Starting epoch 10...
Epoch [10/10], Loss: 0.0060


In [55]:
# Run the testing loop
torch.manual_seed(0)
net.eval()
correct=0
total=0
with torch.no_grad():
    for images,labels in test_loader:
        images, labels = images.to(device),labels.to(device)
        outputs=net(images)
        _, predicted=torch.max(outputs, 1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()
accuracy = 100 * correct / total
print(f'Accuracy on test set: {accuracy:.2f}%')  

Accuracy on test set: 98.87%


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

Total No.of parameters(CNN)=Nout x (FxFxC)+Nout


Total No.of parameters(FC)=(Nin x Nout)+Nout 


Layer_1=20*(5x5x1)+20=520

Layer_2=No changes (Max pooling have no learnable parameters)

Layer_3=((20x12x12)*100)+100=288,100

Layer_4=(100*10)+10=1010

 Total Paramters=520+288,100+1010=289,630

In [4]:

summary(net,(1,28,28))

Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [1, 10]                   --
├─Conv2d: 1-1                            [20, 24, 24]              520
├─ReLU: 1-2                              [20, 24, 24]              --
├─MaxPool2d: 1-3                         [20, 12, 12]              --
├─Linear: 1-4                            [1, 100]                  288,100
├─ReLU: 1-5                              [1, 100]                  --
├─Linear: 1-6                            [1, 10]                   1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (M): 0.54
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 1.16
Estimated Total Size (MB): 1.25