In [2]:
import torch.nn as nn
import torch.optim as optim
import torch
import math

### VGG16 Model

Detailed description of the architecture.

1. **First block:**
   - 1.1 Convolutional layer: 3 input channels (RGB), 64 output channels, 3×3 kernel size, stride=1, and padding of 1 pixel (to maintain spatial dimensions)
   - 1.2 Convolutional layer: 64 input channels (RGB), 64 output channels, 3×3 kernel size, stride=1, and padding of 1 pixel (to maintain spatial dimensions)
   - 1.3 MaxPool layer: kernel size of 2x2, stride=2

2. **Second Block:**
   - 2.1 Convolutional layer: 64 input channels, 128 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 2.2 Convolutional layer: 128 input channels, 128 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 2.3 MaxPool layer: kernel size of 2x2, stride=2

3. **Third Block:**
   - 3.1 Convolutional layer: 128 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.2 Convolutional layer: 256 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.3 Convolutional layer: 256 input channels, 256 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 3.4 MaxPool layer: kernel size of 2x2, stride=2

4. **Fourth Block:**
   - 4.1 Convolutional layer: 256 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.2 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.3 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 4.4 MaxPool layer: kernel size of 2x2, stride=2

5. **Fifth Block:**
   - 5.1 Convolutional layer: 256 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.2 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.3 Convolutional layer: 512 input channels, 512 output channels, 3x3 kernel size, stride=1, and padding of 1 pixel
   - 5.4 MaxPool layer: kernel size of 2x2, stride=2

6. **Sixth Block:**
   - 6.1 


In [3]:

class VGG16(nn.Module):
    def __init__(self, num_classes=2):
        super(VGG16, self).__init__()

        # First block
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # input: 3 x 224 x 224
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # output: 64 x 112 x 112
        )
        
        # Second block
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # output: 128 x 56 x 56
        )

        # Third block
        self.block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)   # output: 256 x 28 x 28
        )

        # Fourth block
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)   # output: 512 x 14 x 14
        )

        # Fifth block
        self.block5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)   # output: 512 x 7 x 7
        )

        # Sixth block (dense layers)
        self.block6 = nn.Sequential(
            nn.Flatten(),   # Flatten the feature map (512 x 7 x 7) into a vector
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes),
        )
    
    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        out = self.block3(out)
        out = self.block4(out)
        out = self.block5(out)    
        out = out.reshape(out.size(0), -1)
        out = self.block6(out)
        return out



### Load Data

We will use the CIFAR10 dataset, which has 10 classes.

In [4]:
BATCH_SIZE=64

In [5]:
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split

# Set seed for reproducibility
torch.manual_seed(42)

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Re-size images for the VGG16
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),    # Normalization
])

# Split into train, test, and validations sets
train_set = CIFAR10("dataset/", train=True, transform=transform, download=True)
val_size = math.ceil(len(train_set) * 0.1)  # 10% of the dataset
train_size = len(train_set) - val_size
train_set, val_set = random_split(train_set, [train_size, val_size])  # Split into train and validation sets
test_set = CIFAR10("dataset/", train=False, transform=transform, download=True)

# Use dataloaders
train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, BATCH_SIZE, shuffle=True)


### Training loop

In [6]:
# Load model
vgg16_model = VGG16(10)

In [7]:
# Set parameters
NUM_EPOCH = 5
LEARNING_RATE = 0.0001

# Set loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16_model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(NUM_EPOCH):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = vgg16_model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()   # clear previous gradients
        loss.backward()
        optimizer.step()

    print(f"Epoch: {epoch+1}, Step:{i+1}, Loss: {loss.item()}")

    # Validations
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            outputs = vgg16_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

torch.Size([64, 10])
torch.Size([64, 10])


KeyboardInterrupt: 

### Testing Loop

In [9]:
correct=0
total=0

with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        outputs = vgg16_model(images)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        del images, labels, outputs
        
print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))   

KeyboardInterrupt: 