In [116]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

import pickle, gzip, os
from pathlib import Path
from fastai import datasets

In [117]:
MNIST_URL='http://deeplearning.net/data/mnist/mnist.pkl'

In [118]:
path = Path(os.getcwd() + "/dataset/mnist")
path = datasets.download_data(MNIST_URL, path, ext='.gz')

In [119]:
with gzip.open(path, 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

In [120]:
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))

In [121]:
def normalize(x, m, s): return (x-m)/s

def normalize_to(train, valid):
    m, s = train.mean(), train.std()
    return normalize(train, m, s), normalize(valid, m, s)

In [122]:
x_train, x_valid = normalize_to(x_train, x_valid)

In [123]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
        
    def forward(self, x): return self.func(x)
    
def flatten(x): return x.view(len(x), -1)

In [124]:
def mnist_resize(x): return x.view(-1, 1, 28, 28)

In [125]:
def get_cnn_model():
    return nn.Sequential(
        Lambda(mnist_resize),
        nn.Conv2d( 1,  8, 5, 2, 2), nn.ReLU(),
        nn.Conv2d( 8, 16, 3, 2, 1), nn.ReLU(),
        nn.Conv2d(16, 32, 3, 2, 1), nn.ReLU(),
        nn.Conv2d(32, 32, 3, 2, 1), nn.ReLU(),
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32, y_train.max().item() + 1)
    )

In [126]:
class Dataset():
    def __init__(self, x, y): self.x, self.y = x, y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i], self.y[i] 

In [127]:
bs = 64
train_ds = Dataset(x_train, y_train)
valid_ds = Dataset(x_valid, y_valid)

train_dl = DataLoader(train_ds, bs, True)
valid_dl = DataLoader(valid_ds, bs*2)

In [128]:
model = get_cnn_model()
opt = optim.SGD(model.parameters(), .5)

In [129]:
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [130]:
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for e in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_func(model(xb), yb).backward()
            opt.step()
            opt.zero_grad()
        
        model.eval()
        with torch.no_grad():
            tot_loss, tot_acc = 0., 0.
            for xb, yb in valid_dl:
                pred = model(xb)
                tot_loss += loss_func(pred, yb)
                tot_acc  += accuracy (pred, yb)
        nv = len(valid_dl)
        print("epoch: {}  loss: {}  acc: {}".format(e, tot_loss/nv, tot_acc/nv))
    return tot_loss/nv, tot_acc/nv

In [55]:
# Below result comes from simple linear layers
# nn.Sequential(nn.Linear(x_train.shape[-1], 50), nn.ReLU(), nn.Linear(50, 10))
# saved in weight.pth

# epoch: 0  loss: 0.10733925551176071  acc: 0.969936728477478
# epoch: 1  loss: 0.09393870085477829  acc: 0.974782407283783
# epoch: 2  loss: 0.11095812171697617  acc: 0.9709256291389465
# epoch: 3  loss: 0.16840021312236786  acc: 0.9548061490058899

In [131]:
# Using CNN layers
fit(4, model, F.cross_entropy, opt, train_dl, valid_dl)

epoch: 0  loss: 0.10532914102077484  acc: 0.9673655033111572
epoch: 1  loss: 0.07269898802042007  acc: 0.9800237417221069
epoch: 2  loss: 0.0744730532169342  acc: 0.9773536324501038
epoch: 3  loss: 0.07222169637680054  acc: 0.9802215099334717


(tensor(0.0722), tensor(0.9802))

In [132]:
torch.save(model.state_dict(), "weight_cnn.pth")