### 5.3 Programming Task: Digit recognition using CNNs

In [None]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.model = nn.Sequential(
            # 1 x 28 x 28 -> 20 x 28 x 28
            nn.Conv2d(1, 20, 5, padding='same'),
            nn.ReLU(),
            # 20 x 28 x 28 -> 20 x 14 x 14
            nn.MaxPool2d(2, 2),
            # 20 x 14 x 14 -> 3920
            nn.Flatten(),
            # 3920 -> 100
            nn.Linear(3920, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
        )


    def forward(self, x):
        out = self.model(x)
        return out
    

Show the net.

In [None]:
net = ConvNet()
print(net)

ConvNet(
  (model): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=3920, out_features=100, bias=True)
    (5): ReLU()
    (6): Linear(in_features=100, out_features=10, bias=True)
  )
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [None]:
# Set hyper parameters.
batch_size = 200
lr = 1e-3
n_epochs = 10
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
# Load the MNIST data set.
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=batch_size, shuffle=True)

In [None]:
# Set the loss function and the optimization criteria
criterion = F.cross_entropy
model = ConvNet().to(device)
opt = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
# Run the main training loop
model.train()
for epoch in range(n_epochs):
    ep_train_loss = []
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        logits = model.forward(X_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        opt.step()
        opt.zero_grad()
        ep_train_loss.append(loss.item())
    mean_ep_train_loss = sum(ep_train_loss) / len(ep_train_loss)
    print(f'Train Epoch: {epoch + 1} of {n_epochs}, loss: {mean_ep_train_loss:.6f}')

Train Epoch: 1 of 10, loss: 0.222977
Train Epoch: 2 of 10, loss: 0.066667
Train Epoch: 3 of 10, loss: 0.048316
Train Epoch: 4 of 10, loss: 0.037190
Train Epoch: 5 of 10, loss: 0.028477
Train Epoch: 6 of 10, loss: 0.022841
Train Epoch: 7 of 10, loss: 0.019759
Train Epoch: 8 of 10, loss: 0.014732
Train Epoch: 9 of 10, loss: 0.011986
Train Epoch: 10 of 10, loss: 0.011634


In [None]:
# Run the testing loop
model.eval()
correct = 0
test_loss = []
for X_batch, y_batch in test_loader:
    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
    with torch.no_grad():
        logits = model(X_batch)
        test_loss.append(criterion(logits, y_batch).item())
        pred = logits.data.max(dim=1)[1]
        correct += pred.eq(y_batch.data).sum()

test_loss = sum(test_loss) / len(test_loss)
print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct:.0f}/{len(test_loader.dataset)} ({(correct / len(test_loader.dataset)):.2%})')

Test set: Average loss: 0.0433, Accuracy: 9869/10000 (98.69%)


**The perfomance of CNN is increased compared to FCNN(98.69% vs 97.69%)**

iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [None]:
Conv2d = (1 * 5 * 5 + 1) * 20
Embedding = (3920 + 1) * 100
Clf = (100 + 1) * 10
print(f'{Conv2d=}\n{Embedding=}\n{Clf=}\nTotal={Conv2d + Embedding + Clf}')

Conv2d=520
Embedding=392100
Clf=1010
Total=393630


In [None]:
summary(model, input_size=(batch_size, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [200, 10]                 --
├─Sequential: 1-1                        [200, 10]                 --
│    └─Conv2d: 2-1                       [200, 20, 28, 28]         520
│    └─ReLU: 2-2                         [200, 20, 28, 28]         --
│    └─MaxPool2d: 2-3                    [200, 20, 14, 14]         --
│    └─Flatten: 2-4                      [200, 3920]               --
│    └─Linear: 2-5                       [200, 100]                392,100
│    └─ReLU: 2-6                         [200, 100]                --
│    └─Linear: 2-7                       [200, 10]                 1,010
Total params: 393,630
Trainable params: 393,630
Non-trainable params: 0
Total mult-adds (M): 160.16
Input size (MB): 0.63
Forward/backward pass size (MB): 25.26
Params size (MB): 1.57
Estimated Total Size (MB): 27.47

**Everything is correct**