# Implement VGG-Net

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms


#test
class LeNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1= nn.Conv2d(1,6,5)
    #subsampling = pooling
    self.pool1 = nn.MaxPool2d(2)
    self.conv2= nn.Conv2d(6,16,5)
    self.pool2 = nn.MaxPool2d(2)
    self.fc1 = nn.Linear(400,120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)
  
  def forward(self,x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.pool1(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = self.pool2(x)
    x = torch.flatten(x, start_dim=1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.relu(x)
    x = self.fc3(x)
    return x

  

# conv = nn.Conv2d(1,6,5)
input_mock = torch.rand(1,1,32,32)
net = LeNet()
net(input_mock).shape
# out_mock = conv(input_mock)
# print(input_mock.shape, out_mock.shape)

torch.Size([1, 10])

In [19]:
#test
input_mock = torch.rand(1,3,224,224)
conv = nn.Conv2d(3,64,3,padding=1)
conv 
conv2 = nn.Conv2d(64,128,3,padding=1)
conv3 = nn.Conv2d(128,256,3,padding=1)
conv4 = nn.Conv2d(256,512,3,padding=1)
conv5 = nn.Conv2d(512,512,3,padding=1)
fc1 = nn.Linear(7*7*512,4096)
fc2 = nn.Linear(4096,4096)
fc3 = nn.Linear(4096,1000)

p = nn.MaxPool2d(2)
x = conv(input_mock)
x = conv2(x)
x  = p(x)
x = conv3(x)
x  = p(x)
x = conv4(x)
x  = p(x)
x = conv5(x)
x  = p(x)
x = p(x)
x = torch.flatten(x,start_dim=1)
x = fc1(x)
x = fc2(x)
x = fc3(x)
print(x.shape)

torch.Size([1, 1000])


In [43]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms

class VGG16(nn.Module):

    def __init__(self, in_channels: int = 3, num_classes: int = 1000):
        super(VGG16, self).__init__()
        # suggestion: look into nn.Sequential()
        # and divide the convolutional feature extraction part of the net
        # from the final fully-connected classification part
        self.conv_features = nn.Sequential(
            nn.Conv2d(in_channels,64,3,padding=1),
            nn.ReLU(),
            nn.Conv2d(64,64,3,padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64,128,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(128,128,3,padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128,256,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(256,256,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(256,256,3,padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(256,512,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(512,512,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(512,512,3,padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(512,512,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(512,512,3,padding='same'),
            nn.ReLU(),
            nn.Conv2d(512,512,3,padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(2),

            #7,7 -> for the optional
            nn.AdaptiveAvgPool2d((7,7))
        )
        #FLATTEN
        self.fc_classifier = nn.Sequential(
            nn.Linear(7*7*512,4096),
            nn.ReLU(),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Linear(4096,num_classes),
            nn.ReLU(),
            nn.Softmax()
        )

        # more self.stuff here...
    
    def forward(self, x):
        # code goes here for the forward function
        x = self.conv_features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc_classifier(x)
        return x

# input_mock = torch.rand(1,3,224,224)
# net = VGG16()
# net(input_mock).shape


# Forward Pass Debug
If it can process random data, then you're mostly alright :D

In [44]:
import numpy as np

net = VGG16()
num_params = sum([np.prod(p.shape) for p in net.parameters()])
print(f"Number of parameters : {num_params}")
print('-'*50)

# test on Imagenet-like shaped data (224x224)

X = torch.rand((8, 3, 224, 224))
print('output shape for imgnet', net(X).shape)


Number of parameters : 138357544
--------------------------------------------------
output shape for imgnet torch.Size([8, 1000])


  input = module(input)


In [45]:
# test on CIFAR-like shaped data (32x32)

X = torch.rand((8, 3, 32, 32))
print('output shape for cifar', net(X).shape)

output shape for cifar torch.Size([8, 1000])


# OPTIONAL: Let's train on CIFAR-10

let's load the dataset

In [None]:
import torchvision
import torchvision.transforms as transforms

mean = (0.4913997551666284, 0.48215855929893703, 0.4465309133731618)
std  = (0.24703225141799082, 0.24348516474564, 0.26158783926049628)

# Choose the appropriate transforms for the problem at hand
# see https://pytorch.org/docs/stable/torchvision/transforms.html
transform = transforms.Compose(
    [transforms.ToTensor(),
     ... # your transforms here
    ])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

let's plot some sample images

In [None]:
import matplotlib.pyplot as plt

# functions to show an image
def imshow(img):
    img = img * np.array(std)[:,None,None] + np.array(mean)[:,None,None] # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
images, labels = images[:4], labels[:4]

# show images
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
assert torch.cuda.is_available(), "Notebook is not configured properly!"
print('Congrats, you\'re running this code on a', torch.cuda.get_device_name(), 'gpu')
device = 'cuda:0'

net = VGG16(...) # initialize VGG16 for this specific classification problem

# Nothing works w/o Batch Norm or Proper Initialization
def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0, 0.01)
            nn.init.constant_(m.bias, 0)
initialize_weights(net)

# define here the Pytorch objects needed for training
crit = ... # loss criterion
opt = ... # optimizer
epochs = 10

Training loop

In [None]:
from datetime import datetime
from IPython.display import Image
from tqdm.notebook import tqdm
from IPython.display import clear_output
from time import sleep
import matplotlib.pyplot as plt

for e in range(epochs):
    # magic progress bar printer
    pbar = tqdm(total=len(trainloader), desc=f'Epoch {e} - 0%')
    
    # training loop
    for i, (x, y) in enumerate(trainloader):
        
        # forward pass goes here
        ...

        # logging functions
        pbar.update(1)
        pbar.set_description(f'Epoch {e} - {round(i/len(trainloader) * 100)}% -- loss {loss.item():.2f}')
    
    # evaluation loop
    corr = 0
    with torch.no_grad():
        for x, y in testloader:
            ...
            corr += ...
    print(f"Accuracy for epoch {e}:{corr / len(testset)}")