# VGG Networks

An image classifier architecture designed by the Visual Geometry Group at Oxford. The VGG pattern uses multiple convolutional layers to refine features before passing to a three layer, fully connected, deep learning network.

Unlike Tensor flow, we need to calculate the padding.  What Tensorflow calls *same* padding is also called *half* padding and results in output the same size as the input. Given: $w_o$: width of the output axis, $w_i$: width of the input axis, $k$: kernel size, $s$: stride and $p$ padding, the calculation is $\lfloor\frac{k}{2}\rfloor$ based on the following:

*For any* $w$ *and for* $k$ *odd:* $k = 2n + 1,\ n \in \mathbb{N},\ s = 1$ *and* $p = \lfloor\frac{k}{2}\rfloor,$

$o = w + 2\lfloor\frac{k}{2}\rfloor - (k - 1)$

$\ \ = w + 2n - 2n$

$\ \ = w$

Since $k=3$ in all instances of this model, $\lfloor\frac{3}{2}\rfloor = 1$ so we use a padding value of 1

Then there's the question of what does the pooling do to the size of the data?  There's no padding involved so the relationship is:

$w_o = \lfloor\frac{w_i-k}{s}\rfloor+1$

All of our MaxPool2d calls use $k=2$ and $s=2$:

$w_o = \lfloor\frac{w_i-2}{2}\rfloor+1 = \lfloor\frac{w_i}{2}\rfloor$

*Ref: [http://arxiv.org/abs/1603.07285](http://arxiv.org/abs/1603.07285)*

## Sequential Approach

Implemented just like the Tensorflow version from the text.

In [1]:
from torch import nn

# This model uses 224 x 224 color images.

sequential_model = nn.Sequential(
    nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(3, 256, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 256, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 256, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.Conv2d(3, 512, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Linear(4096,1000),
    nn.Softmax()
)

print(sequential_model)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (1): ReLU()
  (2): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (3): ReLU()
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (6): ReLU()
  (7): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (8): ReLU()
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (11): ReLU()
  (12): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (13): ReLU()
  (14): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
  (15): ReLU()
  (16): MaxPool2d(kernel_size=2, stride=2, 

## Class Approach

Every Pytorch VGG example I found also includes a [Batch Normalization](https://arxiv.org/abs/1502.03167) layer in each convolutional block as an additional optimization.  I've done that here.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as activation

# This model uses 224 x 224 color images.

class ConvNet(nn.Module):
    
    def __init__(self):
        # Call the parent constructor.
        super().__init__()
        
        # Define the layers.
        # First the convolutional blocks
        self.block64 = self.conv_block(2, 64)
        self.block128 = self.conv_block(2, 128)
        self.block512a = self.conv_block(3, 512)
        self.block512b = self.conv_block(3, 512)
        # Followed by fully connected layers to analyze and classify
        # the features found by the block stack.
        self.hidden1 = nn.Linear(4096, 4096)
        self.hidden2 = nn.Linear(4096, 4096)
        self.outputLayer = nn.Linear(4096,1000)
        
    def forward(self, x):
        # This defines a forward pass for this forward feed network.
        x = self.block64(x)
        x = self.block128(x)
        x = self.block512a(x)
        x = self.block512b
        # Reshape the data to a single dimension
        x = torch.flatten(x)
        x = activation.relu(self.hidden1(x))
        x = activation.relu(self.hidden2(x))
        x = activation.softmax(self.outputlayer(x))
        return x
    
    def conv_block(self, n_layers, n_filters):
        """
        Builds a convolutional block for the VGG architecture.
        
        Each block consists of multiple convolutional layers followed by a MaxPool layer
        ======================
        Convolutional network
        Batch normalization
        ReLU activation
        ----------------------
        Convolutional network
        Batch normalization
        ReLU activation
        ----------------------
        MaxPool
        ======================
        
        Parameters
        ----------
        n_layers: integer
             The number of convolutional layers.

        n_filters: integer
            The number of filters.
        """
        layers = []
        for n in range(n_layers):
            layers += [
                nn.Conv2d(3, n_filters, kernel_size=3, stride=1, padding=1, padding_mode='replicate'),
                nn.BatchNorm2d(n_filters),
                nn.ReLU(),
            ]
        layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        return nn.Sequential(*layers)
    
    
# Instantiate the model
our_model = ConvNet()

print(our_model)

ConvNet(
  (block64): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block128): Sequential(
    (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, st