### 5.3 Programming Task: Digit recognition using CNNs

In [41]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary
%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [42]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv = nn.Conv2d(1, 20, 5, stride=1)
        self.maxpool = nn.MaxPool2d(2, stride=2)
        self.fc1 = nn.Linear(12 * 12 * 20, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = F.relu(self.conv(x))
        x = self.maxpool(x).flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

Show the net.

In [43]:
net = ConvNet()
print(net)

ConvNet(
  (conv): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2880, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.
- Both show good performance (above 97% accuracy)
- However, surprisingly, the fully connected network (5.2) has a slightly better performance with equal hyperparameters, loss and optimizer (98% vs. 97.5%)

In [44]:
batch_size=200
learning_rate=0.01
epochs=10

In [45]:
train_loader = Data.DataLoader(datasets.MNIST('../data', train=True, download=True,
                        transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True)

test_loader = Data.DataLoader(datasets.MNIST('../data', train=False,
                        transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True)

In [46]:
loss_func = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate)

In [47]:
for epoch in range(epochs):
    total_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        net_out = net(data)
        loss = loss_func(net_out, target)
        total_loss += loss.data
        loss.backward()
        optimizer.step()

    print('Train Epoch: {} Avg. Training Loss: {:.6f}'.format(epoch+1, total_loss/len(train_loader.dataset)))

Train Epoch: 1 Avg. Training Loss: 0.003758
Train Epoch: 2 Avg. Training Loss: 0.001512
Train Epoch: 3 Avg. Training Loss: 0.001228
Train Epoch: 4 Avg. Training Loss: 0.001035
Train Epoch: 5 Avg. Training Loss: 0.000882
Train Epoch: 6 Avg. Training Loss: 0.000763
Train Epoch: 7 Avg. Training Loss: 0.000668
Train Epoch: 8 Avg. Training Loss: 0.000594
Train Epoch: 9 Avg. Training Loss: 0.000531
Train Epoch: 10 Avg. Training Loss: 0.000483


In [48]:
test_loss = 0.0
correct = 0.0

for data, target in test_loader:
    net_out = net(data)
    # sum up batch loss
    test_loss += loss_func(net_out, target).data
    pred = net_out.data.max(dim=1)[1]  # get the index of the max log probability. Here [1] returns the argmax. 
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}/{} ({:.2%})\n'.format(
    test_loss, correct, len(test_loader.dataset), correct / len(test_loader.dataset)))


Test set: Average loss: 0.0004, Accuracy: 9746/10000 (97.46%)



iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)
- Convolution: $\#\mathrm{TrainableParameters} = 5^2 \times 20 + 20 = \bf{520}$
- MaxPool: $\#\mathrm{TrainableParameters} = 0$
- Fully Connected 1: $\#\mathrm{TrainableParameters} = 12^2 \times 20 \times 100 + 100 = \bf{288,100}$
- Fully Connected 2: $\#\mathrm{TrainableParameters} = 100\times 10 + 10 = \bf{1,010}$
- Total: $\sum \#\mathrm{TrainableParameters} = \bf{289,630}$

In [65]:
summary(net, input_size=(1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [1, 10]                   --
├─Conv2d: 1-1                            [1, 20, 24, 24]           520
├─MaxPool2d: 1-2                         [1, 20, 12, 12]           --
├─Linear: 1-3                            [1, 100]                  288,100
├─Linear: 1-4                            [1, 10]                   1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (M): 0.59
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 1.16
Estimated Total Size (MB): 1.25