<a href="https://colab.research.google.com/github/CharlesPoletowin/YCBS-273/blob/master/Lecture4_mlp_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%matplotlib inline
from pathlib import Path
import requests
import gzip
import numpy as np

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

from sklearn.metrics import accuracy_score

# End-to-end ML pipeline example with MNIST dataset


## MNIST data setup

MNIST dataset has a training set of 60,000 examples, and a test set of 10,000 examples. Each image is of size 28 x 28.

Original source: http://yann.lecun.com/exdb/mnist/

In [0]:
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

def get_data(url, file_name, offset):
  content = requests.get(url).content
  (PATH / file_name).open("wb").write(content)
  with gzip.open((PATH / file_name).as_posix(), "rb") as f:
    data = np.frombuffer(f.read(), dtype=np.uint8, offset=offset)
  return data

y_train = get_data("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "train-labels", 8).astype('int64')

In [0]:
y_train.shape

(60000,)

In [0]:
x_train = get_data("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "train-images", 16).reshape(len(y_train), 784).astype('float32')
x_train.shape

(60000, 784)

In [0]:
y_test = get_data("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "test-labels", 8).astype('int64')


In [0]:
x_test = get_data("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "test-images", 16).reshape(len(y_test), 784).astype('float32')


In [0]:
x_train, y_train, x_test, y_test = map(
    torch.tensor, (x_train, y_train, x_test, y_test)
)
n = x_train.shape[0]

## Model definition

In [0]:
class Mnist_mlp_classifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Linear(784, 200)    
    self.layer2 = nn.Linear(200, 10)

  def forward(self, xb):
    xb = F.relu(self.layer1(xb))
    xb = F.relu(self.layer2(xb))
    return xb.view(-1, xb.size(1))
  
class Mnist_cnn_classifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(2, 2), stride=2, padding=0)
    self.pool1 = nn.MaxPool2d(kernel_size=(2, 2), padding=0)
    
    self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=(2, 2), stride=1, padding=0)
    self.pool2 = nn.MaxPool2d(kernel_size=(2, 2), padding=0)
    
    self.fc_layer = nn.Linear(72, 10)

  def forward(self, xb):
    h1 = self.conv1(xb)
    h1 = self.pool1(h1)
    h1 = F.relu(h1)
    
    h2 = self.conv2(h1)
    h2 = self.pool2(h2)
    h2 = F.relu(h2)  # 8 x 3 x 3
    
    # flatten the output from conv layers before feeind it to FC layer
    h2 = h2.view(-1, 72)
    out = self.fc_layer(h2)
    
    return out
    
    

## Training loop

In [0]:
def train_model(model, x_train, y_train, x_test, y_test, epochs=250, batch_size=64, lr=0.01, weight_decay=0):
  # data
  train_dataset = TensorDataset(x_train, y_train)
  train_data_loader = DataLoader(train_dataset, batch_size=batch_size)
  
  # loss function
  loss_func = F.cross_entropy

  # optimizer
  optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)

  # training loop
  for epoch in range(epochs):
    for xb, yb in train_data_loader:
      
      pred = model(xb)
      loss = loss_func(pred, yb)

      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    y_pred = model(x_test)
    acc = accuracy_score(torch.argmax(y_pred, dim=1).detach().numpy(), y_test)
    
    print('Loss at epoch %d : %f, test acc: %f' % (epoch, loss, acc))

In [0]:
mlp_model = Mnist_mlp_classifier()
train_model(mlp_model, x_train, y_train, x_test, y_test)

Loss at epoch 0 : 1.123346, test acc: 0.678600
Loss at epoch 1 : 1.075555, test acc: 0.637400
Loss at epoch 2 : 0.911502, test acc: 0.726000
Loss at epoch 3 : 0.729620, test acc: 0.734200
Loss at epoch 4 : 0.610144, test acc: 0.742400
Loss at epoch 5 : 0.689837, test acc: 0.731700
Loss at epoch 6 : 0.598341, test acc: 0.614200
Loss at epoch 7 : 0.853705, test acc: 0.636700
Loss at epoch 8 : 1.201001, test acc: 0.557400
Loss at epoch 9 : 0.905252, test acc: 0.635600
Loss at epoch 10 : 0.900863, test acc: 0.645800
Loss at epoch 11 : 1.185689, test acc: 0.528100
Loss at epoch 12 : 1.181128, test acc: 0.566600
Loss at epoch 13 : 0.969500, test acc: 0.641200
Loss at epoch 14 : 0.962378, test acc: 0.641100
Loss at epoch 15 : 0.961007, test acc: 0.647900
Loss at epoch 16 : 1.019719, test acc: 0.593000
Loss at epoch 17 : 1.174369, test acc: 0.559700
Loss at epoch 18 : 0.813903, test acc: 0.631900
Loss at epoch 19 : 0.889370, test acc: 0.630100
Loss at epoch 20 : 0.820430, test acc: 0.641600
Lo

KeyboardInterrupt: ignored

In [0]:
cnn_model = Mnist_cnn_classifier()
train_model(cnn_model, x_train.view(-1, 1, 28, 28), y_train, x_test.view(-1, 1, 28, 28), y_test)

Loss at epoch 0 : 2.307692, test acc: 0.102800
Loss at epoch 1 : 2.306761, test acc: 0.113500
Loss at epoch 2 : 2.306543, test acc: 0.113500
Loss at epoch 3 : 2.306474, test acc: 0.113500
Loss at epoch 4 : 2.306448, test acc: 0.113500
Loss at epoch 5 : 2.306437, test acc: 0.113500
Loss at epoch 6 : 2.306433, test acc: 0.113500
Loss at epoch 7 : 2.306432, test acc: 0.113500
Loss at epoch 8 : 2.306432, test acc: 0.113500
Loss at epoch 9 : 2.306430, test acc: 0.113500
Loss at epoch 10 : 2.306430, test acc: 0.113500
Loss at epoch 11 : 2.306430, test acc: 0.113500
Loss at epoch 12 : 2.306431, test acc: 0.113500
Loss at epoch 13 : 2.306431, test acc: 0.113500
Loss at epoch 14 : 2.306431, test acc: 0.113500
Loss at epoch 15 : 2.306431, test acc: 0.113500
Loss at epoch 16 : 2.306431, test acc: 0.113500
Loss at epoch 17 : 2.306431, test acc: 0.113500
Loss at epoch 18 : 2.306431, test acc: 0.113500
Loss at epoch 19 : 2.306431, test acc: 0.113500
Loss at epoch 20 : 2.306431, test acc: 0.113500
Lo