# VGGNet

In [42]:
import torch
from torch import nn

In [44]:
cfgs = { "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
         "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
         "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
         "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"] }

In [46]:
class VGG(nn.Module):
    def __init__(self, cfg, batch_norm, num_classes = 1000, init_weights = True, drop_p = 0.5):
        super().__init__()

        self.features = self.make_layers(cfg, batch_norm)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 7x7 사이즈가 되도록 avg pooling (입력 이미지 크기를 resize 할 필요가 없다)
        self.classifier = nn.Sequential(nn.Linear(512 * 7 * 7, 4096),
                                        nn.ReLU(),
                                        nn.Dropout(p = drop_p),
                                        nn.Linear(4096, 4096),
                                        nn.ReLU(),
                                        nn.Dropout(p = drop_p),
                                        nn.Linear(4096, num_classes))

        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode = "fan_out", nonlinearity = "relu")
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def make_layers(self, cfg, batch_norm = False):
        layers = []
        in_channels = 3
        for v in cfg:
            if type(v) == int:
                if batch_norm:
                    layers += [nn.Conv2d(in_channels, v, 3, padding = 1),
                               nn.BatchNorm2d(v),
                               nn.ReLU()]
                else:
                    layers += [nn.Conv2d(in_channels, v, 3, padding = 1),
                               nn.ReLU()]
                in_channels = v
            else:
                layers += [nn.MaxPool2d(2)]

        return nn.Sequential(*layers) # unpacking

In [48]:
model = VGG(cfgs["E"], batch_norm = True)

from torchinfo import summary
summary(model, input_size = (2,3,224,224), device = 'cpu')

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [2, 1000]                 --
├─Sequential: 1-1                        [2, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [2, 64, 224, 224]         1,792
│    └─BatchNorm2d: 2-2                  [2, 64, 224, 224]         128
│    └─ReLU: 2-3                         [2, 64, 224, 224]         --
│    └─Conv2d: 2-4                       [2, 64, 224, 224]         36,928
│    └─BatchNorm2d: 2-5                  [2, 64, 224, 224]         128
│    └─ReLU: 2-6                         [2, 64, 224, 224]         --
│    └─MaxPool2d: 2-7                    [2, 64, 112, 112]         --
│    └─Conv2d: 2-8                       [2, 128, 112, 112]        73,856
│    └─BatchNorm2d: 2-9                  [2, 128, 112, 112]        256
│    └─ReLU: 2-10                        [2, 128, 112, 112]        --
│    └─Conv2d: 2-11                      [2, 128, 112, 112]        147,

In [50]:
model = nn.Sequential(nn.Linear(2,2),
                      nn.ReLU(),
                      nn.Sequential(nn.Linear(2,3),
                                    nn.ReLU(),
                                    nn.Linear(3,3),
                                    nn.ReLU()),
                      nn.Linear(3,10))
[m for m in model.modules()]

[Sequential(
   (0): Linear(in_features=2, out_features=2, bias=True)
   (1): ReLU()
   (2): Sequential(
     (0): Linear(in_features=2, out_features=3, bias=True)
     (1): ReLU()
     (2): Linear(in_features=3, out_features=3, bias=True)
     (3): ReLU()
   )
   (3): Linear(in_features=3, out_features=10, bias=True)
 ),
 Linear(in_features=2, out_features=2, bias=True),
 ReLU(),
 Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
   (2): Linear(in_features=3, out_features=3, bias=True)
   (3): ReLU()
 ),
 Linear(in_features=2, out_features=3, bias=True),
 ReLU(),
 Linear(in_features=3, out_features=3, bias=True),
 ReLU(),
 Linear(in_features=3, out_features=10, bias=True)]

In [52]:
avgpool = nn.AdaptiveAvgPool2d((4, 4))
print(avgpool(torch.randn(2,3,32,32)).shape)
x = torch.randn(2,3,2,2)
print(avgpool(x))

torch.Size([2, 3, 4, 4])
tensor([[[[-0.1141, -0.1141, -0.9957, -0.9957],
          [-0.1141, -0.1141, -0.9957, -0.9957],
          [ 0.7179,  0.7179, -1.0782, -1.0782],
          [ 0.7179,  0.7179, -1.0782, -1.0782]],

         [[ 0.7366,  0.7366,  2.6254,  2.6254],
          [ 0.7366,  0.7366,  2.6254,  2.6254],
          [ 1.3240,  1.3240, -0.9673, -0.9673],
          [ 1.3240,  1.3240, -0.9673, -0.9673]],

         [[ 1.2987,  1.2987, -1.2106, -1.2106],
          [ 1.2987,  1.2987, -1.2106, -1.2106],
          [ 0.0782,  0.0782,  1.0739,  1.0739],
          [ 0.0782,  0.0782,  1.0739,  1.0739]]],


        [[[-2.0857, -2.0857,  0.0269,  0.0269],
          [-2.0857, -2.0857,  0.0269,  0.0269],
          [-0.7073, -0.7073, -0.2146, -0.2146],
          [-0.7073, -0.7073, -0.2146, -0.2146]],

         [[ 0.2618,  0.2618, -0.7347, -0.7347],
          [ 0.2618,  0.2618, -0.7347, -0.7347],
          [-0.8477, -0.8477,  0.7621,  0.7621],
          [-0.8477, -0.8477,  0.7621,  0.7621]],

   