### Importing Important Modules 

In [1]:
#Enabling interactive secession for the notebook and save in very 20 second.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
%autosave 20

Autosaving every 20 seconds


In [2]:
from __future__ import print_function
import torch                                #Importing Torch module as ml framework desined for reserchers and developer.
import torch.nn as nn                       #NN module have classes and modules to implement and train the neural network.
import torch.nn.functional as F             #Functional provides module like activations,losses etc
import torch.optim as optim                 #Importing optimizer moduke from torch
from torchvision import datasets, transforms#Importing Torch vision for datasets and data transforms.

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#importing a module created by me to calculate receptive field.Having copule of bugs but works fine.
from  rf_calc import receptive_field

In [4]:
torch.__version__ #checking Troch version

'1.12.1'

In [5]:
#Checking for torch GPU support
print(f"Is cuda GPU avalable: {torch.cuda.is_available()}")

Is cuda GPU avalable: True


In [7]:
use_cuda = torch.cuda.is_available() #chjecking if cuda is available or not
device = torch.device("cuda" if use_cuda else "cpu") #if gpu is available then device = cuda else cpu
torch.manual_seed(1)
batch_size = 128
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

<torch._C.Generator at 0x7fa8b81e8b30>

In [8]:
### Defining train loader and test loader 

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,  #downloading the data at /data folder,its for trainning
                    transform=transforms.Compose([       #data transformation includes converting to tensor and normalizing with the mean and std of the dataset
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

### Trainning Function

from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()   #model set to trainning
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar): # Iterating through data and target
        data, target = data.to(device), target.to(device)  #pushing data and target to gpu
        optimizer.zero_grad() # making all the gradients to zero
        output = model(data) # predicting op
        loss = F.nll_loss(output, target) # calculating loss
        loss.backward()  # This is backpropagation in action.
        optimizer.step() # Updating old weights with new 
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')

### Testing Function

def test(model, device, test_loader):
    model.eval()  #Model set to evaluation mode.
    test_loss = 0  
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)   #predicting the test data
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

###  Model Class defination

In [11]:
class Net(nn.Module):       #created a model class as Net  inhereate properties from nn.Modules
    def __init__(self,x_times = 1):     #defining Init function.
        super(Net, self).__init__()
        x_times = x_times
        self.conv1 = nn.Conv2d(1, 32//x_times, 3, padding=1)    #input:28*28  OUtput:28*28 RF:3*3   (RF Considering MaxPooling doubles the RF for now)
        self.conv2 = nn.Conv2d(32//x_times, 64//x_times, 3, padding=1)   #input:28*28  OUtput:28*28 RF:5*5
        self.pool1 = nn.MaxPool2d(2, 2)                #input:28*28  OUtput:14*14 RF:10*10
        self.conv3 = nn.Conv2d(64//x_times, 128//x_times, 3, padding=1)  #input:14*14  OUtput:14*14 RF:12*12
        self.conv4 = nn.Conv2d(128//x_times, 256//x_times, 3, padding=1) #input:14*14  OUtput:14*14 RF:14*14
        self.pool2 = nn.MaxPool2d(2, 2)                #input:14*14  OUtput:7*7 RF:28*28
        self.conv5 = nn.Conv2d(256//x_times, 512//x_times, 3)            #input:7*7    OUtput:5*5 RF:30*30
        self.conv6 = nn.Conv2d(512//x_times, 1024//x_times, 3)           #input:5*5    OUtput:3*3 RF:32*32
        self.conv7 = nn.Conv2d(1024//x_times, 10, 3)            #input:3*3    OUtput:1*1 RF:34*34
        
    def forward(self, x):  #forward functions
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x))))) #maxpool--> relu-->conv2-->relu-->conv1
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x))))) #maxpool--> relu-->conv4-->relu-->conv3
        x = F.relu(self.conv6(F.relu(self.conv5(x))))             #relu-->conv6-->relu-->conv5
        x = self.conv7(x)                                         #conv7
        x = x.view(-1, 10)                                        #Changing dimention of data.
        return F.log_softmax(x,-1)                                   #Final log softmax layer 

### Base model 

In [15]:
from torchsummary import summary   #importing torchsummary.
model = Net(x_times = 1).to(device) #creating model and sending it to "CPU/CUDA"
summary(model, input_size=(1, 28, 28))  #Printing the model summary details 

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------

### Let's calculate the Receptive field

In [16]:
rf_df = receptive_field(model_obj=model,input_image=28)

|    | Kernel_size   | Padding   |   Stride | Input_Img_size   | Output_Img_size   | Receptive_field   |
|---:|:--------------|:----------|---------:|:-----------------|:------------------|:------------------|
|  0 | 3*3           | 1         |        1 | 28*28            | 28*28             | 3*3               |
|  1 | 3*3           | 1         |        1 | 28*28            | 28*28             | 5*5               |
|  2 | 2*2           | NO        |        2 | 28*28            | 14*14             | 6*6               |
|  3 | 3*3           | 1         |        1 | 14*14            | 14*14             | 10*10             |
|  4 | 3*3           | 1         |        1 | 14*14            | 14*14             | 14*14             |
|  5 | 2*2           | NO        |        2 | 14*14            | 7*7               | 16*16             |
|  6 | 3*3           | NO        |        1 | 7*7              | 5*5               | 24*24             |
|  7 | 3*3           | NO        |        1 | 5*5      

In [17]:
for i in range(1,7):
    print (f"Model Reduced the parameter {i} times the base model")
    model = Net(x_times = i).to(device) #creating model and sending it to "CPU/CUDA"
    summary(model, input_size=(1, 28, 28))  #Printing the model summary details 
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # setting the optimizer

    for epoch in range(1, 2):    # running is for 1 epoch
        train(model, device, train_loader, optimizer, epoch)  # Running the Train Function
        test(model, device, test_loader)    # running the test Function

Model Reduced the parameter 1 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Siz

loss=0.10171332210302353 batch_id=468: 100%|███████████████████████████| 469/469 [00:08<00:00, 58.60it/s]



Test set: Average loss: 0.0603, Accuracy: 9812/10000 (98.1200%)

Model Reduced the parameter 2 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
            Conv2d-2           [-1, 32, 28, 28]           4,640
         MaxPool2d-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 64, 14, 14]          18,496
            Conv2d-5          [-1, 128, 14, 14]          73,856
         MaxPool2d-6            [-1, 128, 7, 7]               0
            Conv2d-7            [-1, 256, 5, 5]         295,168
            Conv2d-8            [-1, 512, 3, 3]       1,180,160
            Conv2d-9             [-1, 10, 1, 1]          46,090
Total params: 1,618,570
Trainable params: 1,618,570
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backwar

loss=0.05827442929148674 batch_id=468: 100%|███████████████████████████| 469/469 [00:05<00:00, 80.62it/s]



Test set: Average loss: 0.0639, Accuracy: 9785/10000 (97.8500%)

Model Reduced the parameter 3 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
            Conv2d-2           [-1, 21, 28, 28]           1,911
         MaxPool2d-3           [-1, 21, 14, 14]               0
            Conv2d-4           [-1, 42, 14, 14]           7,980
            Conv2d-5           [-1, 85, 14, 14]          32,215
         MaxPool2d-6             [-1, 85, 7, 7]               0
            Conv2d-7            [-1, 170, 5, 5]         130,220
            Conv2d-8            [-1, 341, 3, 3]         522,071
            Conv2d-9             [-1, 10, 1, 1]          30,700
Total params: 725,197
Trainable params: 725,197
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pa

loss=0.02670036070048809 batch_id=468: 100%|███████████████████████████| 469/469 [00:05<00:00, 80.51it/s]



Test set: Average loss: 0.0641, Accuracy: 9785/10000 (97.8500%)

Model Reduced the parameter 4 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
            Conv2d-2           [-1, 16, 28, 28]           1,168
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]           4,640
            Conv2d-5           [-1, 64, 14, 14]          18,496
         MaxPool2d-6             [-1, 64, 7, 7]               0
            Conv2d-7            [-1, 128, 5, 5]          73,856
            Conv2d-8            [-1, 256, 3, 3]         295,168
            Conv2d-9             [-1, 10, 1, 1]          23,050
Total params: 416,458
Trainable params: 416,458
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pa

loss=0.09123865514993668 batch_id=468: 100%|███████████████████████████| 469/469 [00:05<00:00, 80.83it/s]



Test set: Average loss: 0.0617, Accuracy: 9802/10000 (98.0200%)

Model Reduced the parameter 5 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]              60
            Conv2d-2           [-1, 12, 28, 28]             660
         MaxPool2d-3           [-1, 12, 14, 14]               0
            Conv2d-4           [-1, 25, 14, 14]           2,725
            Conv2d-5           [-1, 51, 14, 14]          11,526
         MaxPool2d-6             [-1, 51, 7, 7]               0
            Conv2d-7            [-1, 102, 5, 5]          46,920
            Conv2d-8            [-1, 204, 3, 3]         187,476
            Conv2d-9             [-1, 10, 1, 1]          18,370
Total params: 267,737
Trainable params: 267,737
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pa

loss=0.09373003989458084 batch_id=468: 100%|███████████████████████████| 469/469 [00:05<00:00, 79.88it/s]



Test set: Average loss: 0.0797, Accuracy: 9734/10000 (97.3400%)

Model Reduced the parameter 6 times the base model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 5, 28, 28]              50
            Conv2d-2           [-1, 10, 28, 28]             460
         MaxPool2d-3           [-1, 10, 14, 14]               0
            Conv2d-4           [-1, 21, 14, 14]           1,911
            Conv2d-5           [-1, 42, 14, 14]           7,980
         MaxPool2d-6             [-1, 42, 7, 7]               0
            Conv2d-7             [-1, 85, 5, 5]          32,215
            Conv2d-8            [-1, 170, 3, 3]         130,220
            Conv2d-9             [-1, 10, 1, 1]          15,310
Total params: 188,146
Trainable params: 188,146
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pa

loss=0.13210918009281158 batch_id=468: 100%|███████████████████████████| 469/469 [00:05<00:00, 79.85it/s]



Test set: Average loss: 0.1070, Accuracy: 9671/10000 (96.7100%)



In [None]:
print(device)