# Exercise 4. Convolutional networks

## Part 2. VGG-style network

In the second part you need to train a convolutional neural network with an architecture inspired by a VGG-network.

In [None]:
skip_training = False  # Set this flag to True before validation and submission

In [None]:
# During evaluation, this cell sets skip_training to True
# skip_training = True

In [None]:
# Select data directory
import os
if os.path.isdir('/coursedata'):
    course_data_dir = '/coursedata'
elif os.path.isdir('../data'):
    course_data_dir = '../data'
else:
    # Specify course_data_dir on your machine
    # course_data_dir = ...
    # YOUR CODE HERE
    raise NotImplementedError()

print('The data directory is %s' % course_data_dir)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# Select the device for training (use GPU if you have one)
#device = torch.device('cuda:0')
device = torch.device('cpu')

In [None]:
if skip_training:
    # The models are always evaluated on CPU
    device = torch.device("cpu")

## FashionMNIST dataset

Let us use the FashionMNIST dataset. It consists of 60,000 training images of 10 classes: 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'.

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize((0.5,), (0.5,))  # Min-max scaling to [-1, 1]
])

data_dir = os.path.join(course_data_dir, 'fashion_mnist')
print('Data stored in %s' % data_dir)
trainset = torchvision.datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
testset = torchvision.datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
           'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

In [None]:
# This function computes the accuracy on the test dataset
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# VGG-style network

Let us now define a convolution neural network with an architecture inspired by the [VGG-net](https://arxiv.org/abs/1409.1556):

<img src="vgg-style.png" width=600 style="float: left;">

The architecture:
* A block of three convolutional layers with:
    * 3x3 kernel
    * 16 output channels
    * one pixel zero-pading on both sides
    * 2d batch normalization after each convolutional layer
    * ReLU nonlinearity after each 2d batch normalization layer
* Max pooling layer with 2x2 kernel and stride 2.
* A block of three convolutional layers with:
    * 3x3 kernel
    * 32 output channels
    * one pixel zero-pading on both sides
    * 2d batch normalization after each convolutional layer
    * ReLU nonlinearity after each 2d batch normalization layer
* Max pooling layer with 2x2 kernel and stride 2.
* One convolutional layer with:
    * 3x3 kernel
    * 48 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* One convolutional layer with:
    * 1x1 kernel
    * 32 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* One convolutional layer with:
    * 1x1 kernel
    * 16 output channels
    * *no padding*
    * 2d batch normalization after the convolutional layer
    * ReLU nonlinearity after the 2d batch normalization layer
* Global average pooling:
    * 5x5 kernel (the input of the layer should be 5x5)
* A fully-connected layer with 10 outputs (no nonlinearity)

**Note: Batch normalization is expected to be right after a convolutional layer, before nonlinearity.**

In [None]:
class VGGNet(nn.Module):
    def __init__(self, n_channels=16):
        """
        Args:
          n_channels (int): Number of channels in the first convolutional layer. The number of channels in the
                             following layers are the multipliers of n_channels.
        """
        super(VGGNet, self).__init__()
        # YOUR CODE HERE
        raise NotImplementedError()

    def forward(self, x, verbose=False):
        """You can (optionally) print the shapes of the intermediate variables with verbose=True."""
        # YOUR CODE HERE
        raise NotImplementedError()

In [None]:
# Let's test the shapes of the tensors
net = VGGNet()
net.to(device)

# Feed a batch of images from the training data to test the network
with torch.no_grad():
    dataiter = iter(trainloader)
    images, labels = dataiter.next()
    images = images.to(device)
    print('Shape of the input tensor:', images.shape)

    y = net(images, verbose=True)
    assert y.shape == torch.Size([32, 10]), "Bad shape of y: y.shape={}".format(y.shape)

print('The shapes seem to be ok.')

In [None]:
# This is a cell used for grading

In [None]:
# This is a cell used for grading

In [None]:
# Now let us train the network using the same training loop
net = VGGNet()
net.to(device)

criterion = nn.CrossEntropyLoss()

In [None]:
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [None]:
n_epochs = 10

In [None]:
net.train()
for epoch in range(n_epochs):
    running_loss = 0.0
    print_every = 200  # mini-batches
    for i, (inputs, labels) in enumerate(trainloader, 0):
        # Transfer to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i % print_every) == (print_every-1):
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/print_every))
            running_loss = 0.0
        if skip_training:
            break

    # Print accuracy after every epoch
    accuracy = compute_accuracy(net, testloader)
    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * accuracy))

    if skip_training:
        break

print('Finished Training')

You should get the test accuracy slightly better than the accuracy of a simple conv net from part 1.

In [None]:
# Save the network to a file, submit this file together with your notebook
filename = '4_vgg_net.pth'
if not skip_training:
    try:
        do_save = input('Do you want to save the model (type yes to confirm)? ').lower()
        if do_save == 'yes':
            torch.save(net.state_dict(), filename)
            print('Model saved to %s' % filename)
        else:
            print('Model not saved')
    except:
        raise Exception('The notebook should be run or validated with skip_training=True.')
else:
    net = VGGNet()
    net.load_state_dict(torch.load(filename, map_location=lambda storage, loc: storage))
    net.to(device)
    print('Model loaded from %s' % filename)

In [None]:
# Let us compute the accuracy on the test set
accuracy = compute_accuracy(net, testloader)
print('Accuracy of the VGG net on the test images: %.3f' % accuracy)