
# Lab exercises 3

For this lab exercises, please submit 2 notebooks / python script and 2 reports, one for each part. The deadline is 22 december.

It is important the you read the documentation to understand how to use Pytorch functions, what kind of transformation they apply etc. You have to take time to read it carefully to understand what you are doing.

    https://pytorch.org/docs/stable/nn.html
    https://pytorch.org/docs/stable/torch.html


# 1. Part one: MNIST classification with Pytorch

The goal of the first part is to learn how to use Pytorch and to observe the impact of regularization during training. You should test different network architectures, e.g. with hidden layers of size 128-128, 128-64-32-16, 256-128-64-32-16, 512-256-128-64-32-16, 800-800, and different activation functions (tanh, relu, sigmoid).

Remember that Pytorch expects data in a different format than in the previous lab exercise: the first dimension is always the batch dimension.

In [0]:
import os
import torch
import dataset_loader
import numpy as np

In [2]:
# Download mnist dataset 
if("mnist.pkl.gz" not in os.listdir(".")):
    !wget http://deeplearning.net/data/mnist/mnist.pkl.gz

# if you have it somewhere else, you can comment the lines above
# and overwrite the path below
mnist_path = "./mnist.pkl.gz"

--2019-11-28 14:46:24--  http://deeplearning.net/data/mnist/mnist.pkl.gz
Resolving deeplearning.net (deeplearning.net)... 132.204.26.28
Connecting to deeplearning.net (deeplearning.net)|132.204.26.28|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16168813 (15M) [application/x-gzip]
Saving to: ‘mnist.pkl.gz’


2019-11-28 14:46:33 (4.25 MB/s) - ‘mnist.pkl.gz’ saved [16168813/16168813]



In [0]:
# load the 3 splits
train_data, dev_data, test_data = dataset_loader.load_mnist(mnist_path)

In [4]:
image = torch.from_numpy(train_data[0][0])
print(image.shape) # flat image of dim (784,)

# reshape the tensor so it is represented as a batch containing a single image
# -1 means "all remaining elements", here it would be equivalent to image.reshape(1, 784)
image = image.reshape(1, -1)
print(image.shape) # flat image of dim (1, 784)

torch.Size([784])
torch.Size([1, 784])


In [0]:
def create_batch(data, index, batch_size):
  if index + batch_size > len(data[0]): batch_size = len(data[0]) - index - 1
  return (torch.cat(
      [
          # we reshape the image tensor so it has dimension (1, 784)
          torch.from_numpy(image).reshape(1, -1)
          for image in data[0][index:index + batch_size]
      ],
      # we want to concatenate on the batch dimension
      dim=0),

      torch.tensor(data[1][index:index + batch_size])
  )

In [93]:
create_batch(dev_data, 1230, 1)[1]

tensor([7])

## 1.2. Layer initialization

By default, Pytorch will apply Kaiming initialization to linear layers. However, I recommend you to always explicitly initialize you network by hand in the constructor.

In [0]:
def get_activation(activations, index):
  if activations[index] == 'relu':
    return torch.relu
  elif activations[index] == 'leaky_relu':
    return torch.leaky_relu
  elif activations[index] == 'elu':
    return torch.elu
  elif activations[index] == 'tanh':
    return torch.tanh
  elif activations[index] == 'sigmoid':
    return torch.sigmoid
  else:
    raise Exception('unknown activation')

In [0]:
def get_initialization(activations, index):
  if activations[index] == 'relu':
    return torch.nn.init.kaiming_uniform_
  elif activations[index] == 'tanh':
    return torch.nn.init.xavier_uniform_
  else:
    return torch.nn.init.xavier_uniform_

In [0]:
class Mlp_classifier(torch.nn.Module):
  def __init__(self, input_dim, num_classes, num_hidden_layers, hidden_layers_dim, activations, dropouts):
    assert isinstance(hidden_layers_dim, list)
    assert isinstance(activations, list)
    assert len(hidden_layers_dim) == num_hidden_layers
    assert isinstance(dropouts, list)
    assert len(dropouts) == num_hidden_layers + 1 # dropout is also for the input layer

    self.activations = activations
    self.dropouts = [torch.nn.Dropout(p=d) for d in dropouts]

    super().__init__()

    self.z_projs = torch.nn.ModuleList()

    i = 0
    previous_layer_dim = input_dim
    for hdim in hidden_layers_dim:
      linear = torch.nn.Linear(previous_layer_dim, hdim)

      initialization = get_initialization(self.activations, i)
      initialization(linear.weight.data)
      torch.nn.init.zeros_(linear.bias.data)
      self.z_projs.append(linear)
      
      previous_layer_dim = hdim
      i += 1

    self.output_proj = torch.nn.Linear(previous_layer_dim, num_classes)

  def forward(self, batch):
    z = batch
    i = 0
    for nn in self.z_projs:
      z = self.dropouts[i](z)
      activation = get_activation(self.activations, i)
      z = activation(nn(z))
      i += 1
    out = self.output_proj(z)
    return out

In [0]:
nn = Mlp_classifier(
    784,
    10,
    1,
    [100],
    ['relu'],
    [0,0.2]
)

In [0]:
optimizer = torch.optim.SGD(
    nn.parameters(),
    lr=0.01
)

In [0]:
loss_builder = torch.nn.NLLLoss(reduction='mean')
m = torch.nn.LogSoftmax(dim=1)

In [133]:
batch_size = 64
n_epochs = 10

for epoch in range(n_epochs):
  nn.train()
  for i in range(0,len(train_data[0]),batch_size):
    batch = create_batch(train_data, i, batch_size)
    x = batch[0]
    gold = batch[1]
    y = nn(x)
    loss = loss_builder(m(y),gold)

    nn.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_value_(nn.parameters(), 5.)  # clip gradient if its norm exceed 5
    optimizer.step()

  nn.eval()
  dev_acc = 0
  for i in range(0,len(dev_data[0]),1):
    batch = create_batch(dev_data, i, 1)
    x = batch[0]
    gold = batch[1]
    y = nn(x)

    if np.argmax(m(y).detach().numpy()) == gold: dev_acc += 1

  dev_acc /= dev_data[0].shape[0]
  print(loss.data, dev_acc*100)

tensor(0.6878) 85.92999999999999
tensor(0.4179) 89.32
tensor(0.3148) 90.3
tensor(0.2614) 90.83
tensor(0.2281) 91.34
tensor(0.2061) 91.64999999999999
tensor(0.1914) 92.04
tensor(0.1801) 92.35
tensor(0.1711) 92.58999999999999
tensor(0.1637) 92.72


In [0]:
def create_batch_conv(data, index, batch_size):
  if index + batch_size > len(data[0]): batch_size = len(data[0]) - index - 1
  return (torch.cat(
      [
          # we reshape the image tensor so it has dimension (1, 784)
          torch.from_numpy(image).reshape(1, 1, 28, 28)
          for image in data[0][index:index + batch_size]
      ],
      # we want to concatenate on the batch dimension
      dim=0),

      torch.tensor(data[1][index:index + batch_size])
  )

In [0]:
class CNN_classifier(torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.conv_model = torch.nn.Sequential(
        torch.nn.Conv2d(1,32,kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(2,stride=2)
    )

    self.linear = torch.nn.Linear(32*13*13,10)

  def forward(self, batch):
    z = batch
    z = self.conv_model(z)
    z = z.view(-1, 32*13*13)
    return self.linear(z)

In [0]:
cnn = CNN_classifier()

In [0]:
optimizer = torch.optim.SGD(
    cnn.parameters(),
    lr=0.01
)

In [0]:
loss_builder = torch.nn.NLLLoss(reduction='mean')
m = torch.nn.LogSoftmax(dim=1)

In [152]:
batch_size = 64
n_epochs = 10

for epoch in range(n_epochs):
  cnn.train()
  for i in range(0,len(train_data[0]),batch_size):
    batch = create_batch_conv(train_data, i, batch_size)
    x = batch[0]
    gold = batch[1]
    y = cnn(x)
    loss = loss_builder(m(y),gold)

    cnn.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_value_(cnn.parameters(), 5.)  # clip gradient if its norm exceed 5
    optimizer.step()

  cnn.eval()
  dev_acc = 0
  for i in range(0,len(dev_data[0]),1):
    batch = create_batch_conv(dev_data, i, 1)
    x = batch[0]
    gold = batch[1]
    y = cnn(x)

    if np.argmax(m(y).detach().numpy()) == gold: dev_acc += 1

  dev_acc /= dev_data[0].shape[0]
  print(loss.data, dev_acc*100)

tensor(0.2950) 90.83
tensor(0.2283) 91.9
tensor(0.2001) 92.44
tensor(0.1805) 92.75
tensor(0.1640) 93.24
tensor(0.1495) 93.77
tensor(0.1361) 94.31
tensor(0.1240) 94.71000000000001
tensor(0.1130) 95.22
tensor(0.1031) 95.63000000000001
