In [1]:
import torch.nn as nn
import torch.optim as optim
import torch

### VGG16 Model

Detailed description of the architecture.

1. **First block:**
   - 1.1 Convolutional layer: 3 input channels (RGB), 64 output channels, 3×3 kernel size, stride=1, and padding of 1 pixel (to maintain spatial dimensions)
   - 1.2 Convolutional layer: 64 input channels (RGB), 64 output channels, 3×3 kernel size, stride=1, and padding of 1 pixel (to maintain spatial dimensions)
   - 1.3 MaxPool layer: kernel size of 2x2, stride=2

2. **Second Block:**
   - 2.1 Convolutional layer: 64 input channels, 128 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 2.2 Convolutional layer: 128 input channels, 128 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 2.3 MaxPool layer: kernel size of 2x2, stride=2

3. **Third Block:**
   - 3.1 Convolutional layer: 128 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.2 Convolutional layer: 256 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.3 Convolutional layer: 256 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.4 MaxPool layer: kernel size of 2x2, stride=2

4. **Fourth Block:**
   - 4.1 Convolutional layer: 256 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.2 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.3 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.4 MaxPool layer: kernel size of 2x2, stride=2

5. **Fifth Block:**
   - 5.1 Convolutional layer: 256 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.2 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.3 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.4 MaxPool layer: kernel size of 2x2, stride=2




In [None]:

class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
    
        # First block
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # input: 3 x 224 x 224
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)  # output: 64 x 112 x 112
        )
        
        # Second block
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)  # output: 128 x 56 x 56
        )

        # Third block
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)   # output: 256 x 28 x 28
        )

        # Fourth block
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)   # 512 x 14 x 14
        )

        # Fifth block
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2)   # 512 x 7 x 7
        )


