# Week 3 lab: Convolutional networks

This week we will get some hands-on experience with convolutional networks on 2D images.

In [0]:
#@title
from __future__ import division, print_function, unicode_literals
from google.colab import files

# Common imports
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import seaborn as sns

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize']  = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "tensorflow"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
    files.download(PROJECT_ROOT_DIR+'/images/'+CHAPTER_ID+'/'+fig_id + ".png") 

Now you need to define the network properties. Fill in the missing structure to implement a multi-layer linear, dense network for `model_dense` and a multi-layer convolutional network for `model`. For the convnet, remember that the output of every `Conv2d` and `MaxPool2d` layer is a 3D tensor of shape *(height, width, channels)*. The *width* and *height* dimensions tend to shrink as we go deeper in the network. The number of channels is controlled by the first argument passed to the Conv2D layers (e.g. 32 or 64). Remember to check the impact of padding parameters and maxpooling output dimensions when structuring the dimensions of the conv2d layers.

The next step would be to feed our last output tensor (of shape (X, Y, N)) into a densely-connected classifier network like those you are already familiar with: a stack of Dense layers. These classifiers process vectors, which are 1D, whereas our current output is a 3D tensor. So first, we will have to flatten our 3D outputs to 1D, and then add a few Dense layers after that. We are going to do 10-way classification, so use a final layer with 10 outputs and a `nn.Softmax` activation.

In [18]:
import collections

model_dense = nn.Sequential(collections.OrderedDict(
    [("flatten1", nn.Flatten()),
     ("dense1", nn.Linear(10, 10)),
     ("dense2", nn.Linear(10, 10)),
     ("dense3", nn.Linear(10, 10)),
     ("softmax1", nn.Softmax(dim=1))]))

model = nn.Sequential(collections.OrderedDict(
    [("conv1", nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))),
     ("relu1", nn.ReLU()),
     ("maxpool1", nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)),
     ("conv2", nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))),
     ("relu2", nn.ReLU()),
     ("maxpool2", nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)),
     ("conv3", nn.Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1))),
     ("relu3", nn.ReLU()),
     ("faltten1", nn.Flatten()),
     ("fc1", nn.Linear(in_features=144, out_features=64, bias=True)),
     ("relu4", nn.ReLU()),
     ("fc2", nn.Linear(in_features=64, out_features=10, bias=True)),
     ("softmax1", nn.Softmax(dim=1))]))

print(model)

Sequential(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1))
  (relu3): ReLU()
  (faltten1): Flatten()
  (fc1): Linear(in_features=144, out_features=64, bias=True)
  (relu4): ReLU()
  (fc2): Linear(in_features=64, out_features=10, bias=True)
  (softmax1): Softmax(dim=1)
)


Now here's what our network looks like:

In [0]:
print(model)

Sequential(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1))
  (relu3): ReLU()
  (Flatten1): Flatten()
  (fc1): Linear(in_features=144, out_features=64, bias=True)
  (relu4): ReLU()
  (fc2): Linear(in_features=64, out_features=10, bias=True)
  (softmax1): Softmax(dim=1)
)



As you can see, our (3, 3, 64) outputs were flattened into vectors of shape (576,), before going through two Dense layers.

Now, let's train our convnet on the Fashion MNIST digits. You can[ learn more about the Fashion-MNIST data set](https://github.com/zalandoresearch/fashion-mnist).

We specify the root directory to store the dataset, download the training data, if not present on the local machine, and then apply the transforms. ToTensor to turn images into Tensor so we can directly use it with our network. The dataset is stored in the dataset class named `train_set`.

In [15]:
import torchvision
import torchvision.transforms as transforms

# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

test_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)
print(train_set, test_set)

0it [00:00, ?it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


26427392it [00:01, 13821291.15it/s]                             


Extracting ./data/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw


0it [00:00, ?it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 95158.31it/s]                            
0it [00:00, ?it/s]

Extracting ./data/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


4423680it [00:01, 3900392.08it/s]                             
0it [00:00, ?it/s]

Extracting ./data/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 30766.46it/s]            

Extracting ./data/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw
Processing...
Done!
Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ./data/FashionMNIST
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           ) Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ./data/FashionMNIST
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )





In [0]:
import datetime
epoch_print_gap = 1

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
            
        if epoch == 1 or epoch % epoch_print_gap == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch, float(loss_train)))

def test_loop(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            #data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


In [0]:
lr = 0.01

loader = torch.utils.data.DataLoader(train_set, batch_size = 32)
test_loader = torch.utils.data.DataLoader(test_set)
#optimizer = optim.Adam(model.parameters(), lr=lr)
optimizer = optim.SGD(model.parameters(), lr=lr)
optimizer_dense = optim.SGD(model_dense.parameters(), lr=lr)

images, labels = next(iter(loader))
grid = torchvision.utils.make_grid(images)


In [20]:
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 10, 
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = loader,
)
training_loop(                                                      
    n_epochs = 10, 
    optimizer = optimizer_dense,
    model = model_dense,
    loss_fn = loss_fn,
    train_loader = loader,
)

2020-02-01 18:46:57.322560 Epoch 1, Training loss 4316.635025262833
2020-02-01 18:47:42.858768 Epoch 2, Training loss 4314.668133497238
2020-02-01 18:48:28.131931 Epoch 3, Training loss 4305.92552113533
2020-02-01 18:49:14.231814 Epoch 4, Training loss 3898.890065073967
2020-02-01 18:49:59.616177 Epoch 5, Training loss 3568.1254167556763
2020-02-01 18:50:45.356261 Epoch 6, Training loss 3510.6457827091217
2020-02-01 18:51:30.742218 Epoch 7, Training loss 3489.7183204889297
2020-02-01 18:52:18.065407 Epoch 8, Training loss 3471.068589091301
2020-02-01 18:53:03.908198 Epoch 9, Training loss 3450.9914383888245
2020-02-01 18:53:49.410030 Epoch 10, Training loss 3436.8643670082092


RuntimeError: ignored

Now run the test data through to see how the models got on. Compare the perfomance of Linear dense models and conv2D models. Experiment with different numbers and sizes of layers and kernel sizes.

In [0]:
test_loop(model=model, test_loader = test_loader)
test_loop(model=model_dense, test_loader = test_loader)