**Question 3 -**
Train a Pure CNN with less than 10000 trainable parameters using the MNIST
Dataset having minimum validation accuracy of 99.40%

**Note -**

1. Code comments should be given for proper code understanding.
2. Implement in both PyTorch and Tensorflow respectively

## Using Tensorflow

In [1]:
#import libraries
import numpy as np
import tensorflow as tf
import keras
from keras.datasets import mnist
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Add, Dense, Flatten, Dropout
from keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import BatchNormalization, Activation

In [2]:
# Loading the dataset and perform splitting
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Peforming reshaping operation
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [4]:
# Normalization
X_train = X_train / 255
X_test = X_test / 255

In [5]:
# One Hot Encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [6]:
# build the model object
model = Sequential()

model.add(Conv2D(8,(3, 3), activation ='relu',input_shape = (28,28,1))) # Channel dimension = 26*26*10 , receptive field 3*3
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16,(3, 3), activation ='relu'))                      # Channel dimension = 24*24*16 , receptive field 5*5
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(8,(1,1), activation ='relu'))                        # Channel dimension = 24*24*10 , receptive field 5*5
model.add(MaxPooling2D(pool_size=(2,2)))                              # Channel dimension = 12*12*10 , receptive field 10*10

model.add(Conv2D(10,(3,3), activation ='relu'))                       # Channel dimension = 10*10*16 , receptive field 12*12
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(10,(3,3), activation ='relu'))                       # Channel dimension = 8*8*16 , receptive field 14*14
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16,(3,3), activation ='relu'))                       # Channel dimension = 6*6*16 , receptive field 16*16
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16,(3,3), activation ='relu'))                       # Channel dimension = 4*4*16 , receptive field 18*18
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(10,4,4))                                             # Channel dimension 4*4 to see the complete image 

model.add(Flatten())

model.add(Activation('softmax'))

In [7]:
# Print the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 8)         80        
                                                                 
 batch_normalization (BatchN  (None, 26, 26, 8)        32        
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 26, 26, 8)         0         
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 16)        1168      
                                                                 
 batch_normalization_1 (Batc  (None, 24, 24, 16)       64        
 hNormalization)                                                 
                                                                 
 dropout_1 (Dropout)         (None, 24, 24, 16)        0

In [8]:
# Compile the model
model.compile(loss =keras.metrics.categorical_crossentropy,optimizer=keras.optimizers.Adam(),metrics=['accuracy'])

In [9]:
# Train the model
history = model.fit(X_train,y_train,batch_size=64, epochs=30, validation_data=(X_test,y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

Test accuracy: 0.9933000206947327


## Using Torch

In [10]:
#import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

In [11]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
# Load and normalize dataset
transform = transforms.Compose([transforms.ToTensor(),
  transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True,download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False,download=True, transform=transform)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 267869203.75it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 123607850.84it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 126862173.65it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz



100%|██████████| 4542/4542 [00:00<00:00, 6968006.13it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [13]:
# Create data loaders
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
# Check the size of the loaded data
print("Training data size:", len(train_dataset))
print("Testing data size:", len(test_dataset))

Training data size: 60000
Testing data size: 10000


In [15]:
# Hyperparameters
num_epochs = 30
batch_size = 64
learning_rate = 0.001

In [16]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, 3)  # input_channel=1, output_channel=8, kernel_size=3x3
        self.batchnorm1 = nn.BatchNorm2d(8)
        self.dropout1 = nn.Dropout(0.1)
        
        self.conv2 = nn.Conv2d(8, 16, 3)  # input_channel=8, output_channel=16, kernel_size=3x3
        self.batchnorm2 = nn.BatchNorm2d(16)
        self.dropout2 = nn.Dropout(0.1)
        
        self.conv3 = nn.Conv2d(16, 8, 1)  # input_channel=16, output_channel=8, kernel_size=1x1
        self.maxpool = nn.MaxPool2d(2)  # Max pooling with kernel_size=2x2
        
        self.conv4 = nn.Conv2d(8, 10, 3)  # input_channel=8, output_channel=10, kernel_size=3x3
        self.batchnorm3 = nn.BatchNorm2d(10)
        self.dropout3 = nn.Dropout(0.1)
        
        self.conv5 = nn.Conv2d(10, 10, 3)  # input_channel=10, output_channel=10, kernel_size=3x3
        self.batchnorm4 = nn.BatchNorm2d(10)
        self.dropout4 = nn.Dropout(0.1)
        
        self.conv6 = nn.Conv2d(10, 16, 3)  # input_channel=10, output_channel=16, kernel_size=3x3
        self.batchnorm5 = nn.BatchNorm2d(16)
        self.dropout5 = nn.Dropout(0.1)
        
        self.conv7 = nn.Conv2d(16, 16, 3)  # input_channel=16, output_channel=16, kernel_size=3x3
        self.batchnorm6 = nn.BatchNorm2d(16)
        self.dropout6 = nn.Dropout(0.1)
        
        self.conv8 = nn.Conv2d(16, 10, 4)  # input_channel=16, output_channel=10, kernel_size=4x4
        
        self.flatten = nn.Flatten()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = nn.ReLU()(self.conv1(x))
        x = self.batchnorm1(x)
        x = self.dropout1(x)
        
        x = nn.ReLU()(self.conv2(x))
        x = self.batchnorm2(x)
        x = self.dropout2(x)
        
        x = nn.ReLU()(self.conv3(x))
        x = self.maxpool(x)
        
        x = nn.ReLU()(self.conv4(x))
        x = self.batchnorm3(x)
        x = self.dropout3(x)
        
        x = nn.ReLU()(self.conv5(x))
        x = self.batchnorm4(x)
        x = self.dropout4(x)
        
        x = nn.ReLU()(self.conv6(x))
        x = self.batchnorm5(x)
        x = self.dropout5(x)
        
        x = nn.ReLU()(self.conv7(x))
        x = self.batchnorm6(x)
        x = self.dropout6(x)
        
        x = self.conv8(x)
        
        x = self.flatten(x)
        x = self.softmax(x)

        return x


In [17]:
# Create the model
model = Model().to(device)

In [18]:
# Print the model summary
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
           Dropout-3            [-1, 8, 26, 26]               0
            Conv2d-4           [-1, 16, 24, 24]           1,168
       BatchNorm2d-5           [-1, 16, 24, 24]              32
           Dropout-6           [-1, 16, 24, 24]               0
            Conv2d-7            [-1, 8, 24, 24]             136
         MaxPool2d-8            [-1, 8, 12, 12]               0
            Conv2d-9           [-1, 10, 10, 10]             730
      BatchNorm2d-10           [-1, 10, 10, 10]              20
          Dropout-11           [-1, 10, 10, 10]               0
           Conv2d-12             [-1, 10, 8, 8]             910
      BatchNorm2d-13             [-1, 10, 8, 8]              20
          Dropout-14             [-1, 1

In [19]:
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [20]:
#Model Training
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # Move inputs and labels to the same device as the model
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

Finished Training


In [21]:
# Evaluation
model.eval()  # Set the model to evaluation mode

correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')


Test Accuracy: 99.09%
