In [2]:
!jupyter nbconvert --to markdown /content/drive/MyDrive/Legend13/Self_Implement/psm_VGGnet.ipynb

[NbConvertApp] Converting notebook /content/drive/MyDrive/Legend13/Self_Implement/psm_VGGnet.ipynb to markdown
[NbConvertApp] Writing 11733 bytes to /content/drive/MyDrive/Legend13/Self_Implement/psm_VGGnet.md


In [1]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append("/content/drive/MyDrive/Legend13")

Mounted at /content/drive


In [None]:
import torch 
import torch.nn as nn
!pip install torchinfo
from torchinfo import summary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


## VGGnet Architecture

<p align="center"><img src="https://github.com/SuminizZ/Physics/assets/92680829/d5bc27bc-b8e0-41fa-8d88-ffa72bd46f5d" width="600"></p>


<p align="center"><img src="https://github.com/SuminizZ/Physics/assets/92680829/5057622a-4e31-43e1-a997-8708bcf3a3cd" width="570px"></p>



1. LRN -> doesn't really contribute to improving performance
2. repeat 3x3 convolution 
    1. deepest-possible networks with locational focus: using smallest sized receptive field to capture all direcitons (left/righ/up/down), which prevents representational bottleneck that might occur due to an extreme compression with large receptive fields
    2. increase non-linearity by adding extra maxpooling layers between deep 3x3 conv layers -> can build more complex and non-linear predicting functions 
    3. save computational resources : can reduce dimension of parameters by factorizing large sized feature maps into multiple smaller sized maps while maintaining the size of receptive field. (share parameters between adjacent pixels)

In [None]:
configs = {'A' : [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
           'B' : [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
           'C' : [64, 64, 'M', 128, 128, 'M', 256, 256, (256,1), 'M', 512, 512, (512,1), 'M', 512, 512, (512,1), 'M'],
           'D' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 
           'E' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']}

In [None]:
# input_shape = (N, 3, 224, 224)

class VGGnet(nn.Module):
    def __init__(self, config, bn, num_classes, init_weights=True, p=0.5):
        super().__init__()
        layers = self.build_layers(config, bn)
        self.features = nn.Sequential(*layers)    # (512,7,7) 
        self.avgpool = nn.AdaptiveAvgPool2d((7,7))  # set the shape of output as (7,7)
        self.fc = nn.Sequential(nn.Linear(512*7*7, 4096),
                                nn.ReLU(),
                                nn.Dropout(p),
                                nn.Linear(4096,4096),
                                nn.ReLU(),
                                nn.Dropout(p),
                                nn.ReLU())
        self.classifier = nn.Linear(4096,num_classes)

        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 1e-2)
                    nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        out = self.classifier(x)
        return out 

    def build_layers(self, config, bn):
        layers = []
        in_channel = 3

        for v in config:
            if v == 'M':
                layers += [nn.MaxPool2d(2)]
            else:
                if type(v) == int:
                    if bn:
                        layers += [nn.Conv2d(in_channel, v, 3, padding=1),
                                nn.BatchNorm2d(v),
                                nn.ReLU()]
                    else:
                        layers += [nn.Conv2d(in_channel, v, 3, padding=1),
                                nn.ReLU()]
                else:
                    v, size = v
                    if bn:
                        layers += [nn.Conv2d(in_channel, v, size),
                                   nn.BatchNorm2d(v),
                                   nn.ReLU()]
                    else:
                        layers += [nn.Conv2d(in_channel, v, size),
                                   nn.ReLU()]
                in_channel = v

        return layers        
                    
            

## Model Summary

In [None]:
model = VGGnet(configs['E'], True, 1000)
summary(model, input_size=(2, 3, 224, 224), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
VGGnet                                   [2, 1000]                 --
├─Sequential: 1-1                        [2, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [2, 64, 224, 224]         1,792
│    └─BatchNorm2d: 2-2                  [2, 64, 224, 224]         128
│    └─ReLU: 2-3                         [2, 64, 224, 224]         --
│    └─Conv2d: 2-4                       [2, 64, 224, 224]         36,928
│    └─BatchNorm2d: 2-5                  [2, 64, 224, 224]         128
│    └─ReLU: 2-6                         [2, 64, 224, 224]         --
│    └─MaxPool2d: 2-7                    [2, 64, 112, 112]         --
│    └─Conv2d: 2-8                       [2, 128, 112, 112]        73,856
│    └─BatchNorm2d: 2-9                  [2, 128, 112, 112]        256
│    └─ReLU: 2-10                        [2, 128, 112, 112]        --
│    └─Conv2d: 2-11                      [2, 128, 112, 112]        147,

## Parameters
<img width="500" alt="image" src="https://github.com/SuminizZ/Physics/assets/92680829/1240600d-b2cf-4ef3-ba5f-4e18f14154bd">

In [None]:
for key, val in configs.items():
    tmp_model = VGGnet(val, True, 1000)
    print(f"ConvNet Configuration {key} Parameters : {sum([p.numel() for p in tmp_model.parameters() if p.requires_grad])}")

ConvNet Configuration A Parameters : 132868840
ConvNet Configuration B Parameters : 133053736
ConvNet Configuration C Parameters : 133647400
ConvNet Configuration D Parameters : 138365992
ConvNet Configuration E Parameters : 143678248


In [None]:
x = torch.randn(2, 3, 18, 18)
avgpool = nn.AdaptiveAvgPool2d((7,7))
avgpool(x).shape

torch.Size([2, 3, 7, 7])

## Forward Pass


In [None]:
x = torch.randn(2,3,224,224)
out = model(x)

In [None]:
print(out.shape)
out

torch.Size([2, 1000])


tensor([[-0.0761, -0.1180,  1.7652,  ...,  1.2305, -1.1635,  0.3651],
        [-0.4145,  0.1778,  0.8768,  ...,  0.8948, -0.0290,  0.2008]],
       grad_fn=<AddmmBackward0>)