In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np

In [23]:
## Here in an architecture any integer x means that at that place a convolution layer is applied with output channels x. Also in that convolution layer kernel size is (3,3), stride is 1, padding is 1.
## 'M' means Max Pooling Layer of (2,2) and stride of 2
vgg_architectures = {
    "VGG11" : [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    "VGG13" : [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    "VGG16" : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    "VGG19" : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}
## After these, we apply two fully connected layer with 4096 output classes and then we apply one more fully connected layer with 'num_classes' output classes

We are here assuming that the input size of the picture is 224*224*3. And we are here applying 5 Max pool layers. So the output will be of size 224/(2**5) = 7

In [24]:
class VGG_net(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(architecture = vgg_architectures["VGG19"])

        self.fcs = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for layer in architecture:
            if type(layer) == int:
                layers.append(nn.Conv2d(in_channels=in_channels, out_channels=layer, kernel_size=(3,3), stride=(1,1), padding=(1,1)))
                layers.append(nn.BatchNorm2d(layer))
                in_channels = layer
                layers.append(nn.ReLU())
            elif layer == 'M':
                layers.append(nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)))
        return nn.Sequential(*layers)


In [25]:
model = VGG_net(in_channels=3, num_classes=1000)

In [26]:
x = torch.rand(1, 3, 224, 224)
print(x)
out = model(x)
print(out)
out.shape

tensor([[[[0.7404, 0.7446, 0.5722,  ..., 0.3621, 0.7758, 0.0145],
          [0.1077, 0.2033, 0.1104,  ..., 0.3526, 0.1353, 0.7454],
          [0.4315, 0.8110, 0.0501,  ..., 0.6120, 0.2502, 0.2000],
          ...,
          [0.5065, 0.1249, 0.2004,  ..., 0.6928, 0.5515, 0.9457],
          [0.5673, 0.2860, 0.3603,  ..., 0.4233, 0.6968, 0.3537],
          [0.3192, 0.9678, 0.9926,  ..., 0.0022, 0.9868, 0.4007]],

         [[0.8353, 0.6604, 0.2384,  ..., 0.6226, 0.5684, 0.1758],
          [0.2424, 0.0235, 0.4532,  ..., 0.2668, 0.0093, 0.0958],
          [0.9790, 0.1176, 0.5613,  ..., 0.6039, 0.7945, 0.4986],
          ...,
          [0.9858, 0.6709, 0.1189,  ..., 0.2535, 0.6149, 0.7216],
          [0.1719, 0.3134, 0.2002,  ..., 0.3452, 0.2907, 0.5142],
          [0.8188, 0.3959, 0.6512,  ..., 0.2836, 0.9491, 0.1297]],

         [[0.2018, 0.1291, 0.6709,  ..., 0.0064, 0.6560, 0.7417],
          [0.9440, 0.2020, 0.6231,  ..., 0.0758, 0.7913, 0.8484],
          [0.2075, 0.7852, 0.3158,  ..., 0

torch.Size([1, 1000])