# Exercise 1

## Hand-written digit classification with multi-layer perceptron

In this exercise, we will build a multi-layer perceptron with Pytorch for hand-written digit classification. We will still use MNIST as the datase.

In [None]:
import torch
import torchvision

Set random seed

In [None]:
random_seed = 9999
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

Load the dataset using Pytorch API. First we load the training set. The training set contains 60000 images and the test set contains 10000 images. 

In [None]:
batch_size_train = 64
batch_size_test = 1000


trainset = torchvision.datasets.MNIST('./', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

print (len(trainset))

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=True)

In [None]:
testset = torchvision.datasets.MNIST('./', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

print (len(testset))

test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size_test, shuffle=True)

Let's show some example images.

In [None]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()

for i in range(5):
  plt.subplot(1,5,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.xticks([])
  plt.yticks([])

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

Let's build a multi-layer perceptron With Pytorch, you can use nn.Linear to construct one fully-connected layer. You can refer to https://pytorch.org/tutorials/ if neended.

In [None]:
# your code goes here

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()


    def forward(self, x):


        return F.log_softmax(x)

In [None]:
network = MLP()

print (network)

For training the network, we need to specify the optimizer. 

In [None]:
n_epochs = 10
learning_rate = 0.01
momentum = 0.9
log_interval = 10

optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                      momentum=momentum)


In [None]:
train_losses = []
test_losses = []

In [None]:
def train(epoch):
  network.train()
   
  train_loss = 0

  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    
    output = network(data)
    loss = F.nll_loss(output, target)
    
    train_loss += loss.item()
    
    loss.backward()
    
    optimizer.step()
    
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
    
  
  train_loss /= len(train_loader.dataset)
  train_losses.append(train_loss)
        

In [None]:
def test():
  network.eval()

  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [None]:
for epoch in range(1, n_epochs + 1):
  
  train(epoch)
  test()

In [None]:
fig = plt.figure()
plt.plot(train_losses, "-*", color='blue')
plt.plot(test_losses, "-^", color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')

In [None]:
with torch.no_grad():
  output = network(example_data)

In [None]:
fig = plt.figure()
for i in range(5):
  plt.subplot(1,5,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.title("Prediction: {}".format(
    output.data.max(1, keepdim=True)[1][i].item()))
  plt.xticks([])
  plt.yticks([])


1. Change the learning rate to see if you can obtain better results.

2. Change the activation function. 

3. Change the number of the hidden layers.