## LeNet Implementation

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


lenet = LeNet().to("cpu")
print(lenet)
summary(lenet, (1,32,32), device = "cpu")

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
            Conv2d-2           [-1, 16, 10, 10]           2,416
            Linear-3                  [-1, 120]          48,120
            Linear-4                   [-1, 84]          10,164
            Linear-5                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.05
Params size (MB): 0.24
Estimated Total Size (

### Manually calculation is done to match this result for LeNet

## AlexNet as per your slide 13, ch2.
### However I couldnt get 61M params as mentioned on this one.  I tried

In [2]:
class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 96, 11, stride=4)
        self.pool1 =  nn.MaxPool2d(3, stride=2)
        self.conv2 = nn.Conv2d(96, 256, 5, padding=2)
        self.pool2 =  nn.MaxPool2d(3, stride=2)

        self.conv3 = nn.Conv2d(256, 384, 3, padding=1)
        self.conv4 = nn.Conv2d(384, 384, 3, padding=1)
        self.conv5 = nn.Conv2d(384, 256, 3, padding=1)
        self.pool3 =  nn.MaxPool2d(3, stride=2)


        self.fc1 = nn.Linear(256 * 5 * 5, 4096)  ##########
        self.fc2 = nn.Linear(4096, 4096, )
        self.fc3 = nn.Linear(4096, 1000, )

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)), )
        x = self.pool2(self.conv2(x))
        x =  self.conv3(x)
        x = F.relu(self.conv4(x))
        x = self.pool3(F.relu(self.conv5(x)))
        x = x.view(-1, 256 * 5 * 5) ##########
        # x =  torch.flatten(x)
        x = F.relu(F.dropout(self.fc1(x)), 0.5)
        x = F.relu(F.dropout(self.fc2(x)), 0.5)
        x = self.fc3(x)
        return x

alexnet = AlexNet().to("cpu")
print(alexnet)
summary(alexnet, (1,224,224), device = "cpu")

AlexNet(
  (conv1): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=6400, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=1000, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-

### Manually calculation is done to match this result for Alexnet. 

I also modified above to accomodate AlexNet at https://pytorch.org/vision/main/_modules/torchvision/models/alexnet.html
This contains: extra AdaptiveAvgPool2d

And got 61M Params !! Yay!

In [3]:

class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 11, stride=4, padding=2)
        self.pool1 =  nn.MaxPool2d(3, stride=2)
        self.conv2 = nn.Conv2d(64, 192, 5, padding=2)
        self.pool2 =  nn.MaxPool2d(3, stride=2)

        self.conv3 = nn.Conv2d(192, 384, 3, padding=1)
        self.conv4 = nn.Conv2d(384, 256, 3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, 3, padding=1)
        self.pool3 =  nn.MaxPool2d(3, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))  ## this was not on your net on slide

        self.fc1 = nn.Linear(256 * 6 * 6, 4096)  ##########
        self.fc2 = nn.Linear(4096, 4096, )
        self.fc3 = nn.Linear(4096, 1000, )

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)), )
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool3(F.relu(self.conv5(x)))
        x= self.avgpool(x)
        x = x.view(-1, 256 * 6 * 6) ##########
        # x =  torch.flatten(x)
        x = F.relu(F.dropout(self.fc1(x)), 0.5)
        x = F.relu(F.dropout(self.fc2(x)), 0.5)
        x = self.fc3(x)
        return x


alexnet = AlexNet().to("cpu")
print(alexnet)
summary(alexnet, (3,224,224), device = "cpu")

AlexNet(
  (conv1): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (fc1): Linear(in_features=9216, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=1000, bias=True)
)
----------------------------------------------------------------
        Layer (type)          

## Now Lets see ZFNet

In [4]:

class ZFNet(nn.Module):

    def __init__(self):
        super(ZFNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 7, stride=2, padding=2) #change CONV1 filter from 11,4 to 7,2
        self.pool1 =  nn.MaxPool2d(3, stride=2)   #same as ALexnet
        self.conv2 = nn.Conv2d(64, 192, 5, padding=2)
        self.pool2 =  nn.MaxPool2d(3, stride=2)  #same as ALexnet

        self.conv3 = nn.Conv2d(192, 512, 3, padding=1) #CONV3: 512 filters instead of 384
        self.conv4 = nn.Conv2d(512, 1024, 3, padding=1) #CONV4: 1024 filters 
        self.conv5 = nn.Conv2d(1024, 512, 3, padding=1) #CONV5: 512 filters
        self.pool3 =  nn.MaxPool2d(3, stride=2)   #same as ALexnet
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))  ## this was not on your net on slide

        self.fc1 = nn.Linear(512 * 6 * 6, 4096)
        self.fc2 = nn.Linear(4096, 4096, )
        self.fc3 = nn.Linear(4096, 1000, )

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)), )
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool3(F.relu(self.conv5(x)))
        x= self.avgpool(x)
        x = x.view(-1, 512 * 6 * 6) ##########
        # x =  torch.flatten(x)
        x = F.relu(F.dropout(self.fc1(x)), 0.5)
        x = F.relu(F.dropout(self.fc2(x)), 0.5)
        x = self.fc3(x)
        return x


zFNet = ZFNet().to("cpu")
print(zFNet)
summary(zFNet, (3,224,224), device = "cpu")

ZFNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(192, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (fc1): Linear(in_features=18432, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=1000, bias=True)
)
----------------------------------------------------------------
        Layer (type)           

### Compare the number of parameters.  
AlexNet: 61,100,840 ---   ZFNet: 107,020,712 params;  
    