<a href="https://colab.research.google.com/github/AnuruddhaPaul/VGGNET_From_Scratch/blob/main/VGG16_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 1. Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 2. Hyperparameters
num_epochs = 5  # Kept low for demonstration; increase for better accuracy
batch_size = 64
learning_rate = 0.001

# 3. Data Preparation (MNIST)
# VGG reduces dimensions by half 5 times. 28x28 causes issues at the last layers.
# We resize to 32x32 to allow proper halving: 32 -> 16 -> 8 -> 4 -> 2 -> 1
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,)) # MNIST mean and std
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True,
                                           transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False,
                                          transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# 4. VGG-16 Architecture Implementation
class VGG16(nn.Module):
  def __init__(self, num_classes=10):
    super(VGG16, self).__init__()
    self.features = nn.Sequential(
        nn.Conv2d(1,64,kernel_size=3,padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.Conv2d(64,64,kernel_size=3,padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2,stride=2),

        nn.Conv2d(64, 128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(128, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(256, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
        )
    self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )
  def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1) # Flatten inputs for dense layer
        x = self.classifier(x)
        return x

model = VGG16(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)

# 6. Training Loop
total_step = len(train_loader)
print("Starting Training...")

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')

# 7. Testing / Evaluation
model.eval() # Set model to evaluation mode (affects Dropout and BatchNorm)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10000 test images: {100 * correct / total} %')

# Save the model checkpoint
torch.save(model.state_dict(), 'vgg16_mnist.pth')

Using device: cuda
Starting Training...
Epoch [1/5], Step [100/938], Loss: 0.2897
Epoch [1/5], Step [200/938], Loss: 0.0979
Epoch [1/5], Step [300/938], Loss: 0.0463
Epoch [1/5], Step [400/938], Loss: 0.0300
Epoch [1/5], Step [500/938], Loss: 0.0929
Epoch [1/5], Step [600/938], Loss: 0.0765
Epoch [1/5], Step [700/938], Loss: 0.1114
Epoch [1/5], Step [800/938], Loss: 0.0256
Epoch [1/5], Step [900/938], Loss: 0.0409
Epoch [2/5], Step [100/938], Loss: 0.0110
Epoch [2/5], Step [200/938], Loss: 0.0659
Epoch [2/5], Step [300/938], Loss: 0.0516
Epoch [2/5], Step [400/938], Loss: 0.0016
Epoch [2/5], Step [500/938], Loss: 0.0159
Epoch [2/5], Step [600/938], Loss: 0.0103
Epoch [2/5], Step [700/938], Loss: 0.1499
Epoch [2/5], Step [800/938], Loss: 0.0975
Epoch [2/5], Step [900/938], Loss: 0.0477
Epoch [3/5], Step [100/938], Loss: 0.0016
Epoch [3/5], Step [200/938], Loss: 0.0176
Epoch [3/5], Step [300/938], Loss: 0.0040
Epoch [3/5], Step [400/938], Loss: 0.0436
Epoch [3/5], Step [500/938], Loss: 0