In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [3]:
import scrapbook as sb

In [4]:
import sys
sys.path.append('../../modules')

In [5]:
import mnist

In [6]:
digits = mnist.MNIST('../data')

In [7]:
n_samples = 7000

In [8]:
dataset = digits.flat_train_dataset(n_samples)

In [9]:
train_dataset, validation_dataset = torch.utils.data.random_split(dataset, (6000,1000))

In [10]:
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size = 100, 
                                           shuffle = True)
validation_loader = torch.utils.data.DataLoader(validation_dataset, 
                                           batch_size = 100, 
                                           shuffle = True)

## The model 

We will use a fully  four  fully connected layers with `ReLU` activation layers in between as our model and `softmax` as the last layer.  The model can be easily constructed using the PyTorch `nn.Sequential` class:

In [11]:
model = torch.nn.Sequential(
    nn.Linear(28*28,1200), nn.ReLU(),
    nn.Linear(1200,600), nn.ReLU(),
    nn.Linear(600,300), nn.ReLU(),
    nn.Linear(300,10)
)

In [12]:
def accuracy(pred, labels):
    return torch.sum(torch.argmax(pred,axis = 1)==labels).to(dtype=torch.float32).item()/len(labels)

In [13]:
def model_accuracy(model, dataset):
    features, labels = dataset[:]
    with torch.no_grad():
        pred = model(features)
    return accuracy(pred, labels)

Before we start training we need the loss function:

In [14]:
ce_loss = torch.nn.CrossEntropyLoss()

In [15]:
def kaiming_init_uniform(sigma=1):
    def init(layer): 
        if isinstance(layer,torch.nn.modules.linear.Linear):
            fan_in = layer.weight.size(1)
            s  = np.sqrt(6/fan_in)    
            torch.nn.init.uniform_(layer.weight,-sigma*s,sigma*s)
            if layer.bias is not None:
                torch.nn.init.zeros_(layer.bias)
    return init  

In [16]:
steper = 'Adam'
steper_parameters = {'lr': 0.01}

In [17]:
sb.glue('steper',steper)
sb.glue('steper_parameters', steper_parameters)

In [18]:
model.apply(kaiming_init_uniform())

Sequential(
  (0): Linear(in_features=784, out_features=1200, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1200, out_features=600, bias=True)
  (3): ReLU()
  (4): Linear(in_features=600, out_features=300, bias=True)
  (5): ReLU()
  (6): Linear(in_features=300, out_features=10, bias=True)
)

In [19]:
optim = getattr(torch.optim,steper)(model.parameters(), **steper_parameters)

In [20]:
%%time
for e in range(16):
    for features, labels in train_loader:        
        optim.zero_grad()
        pred = model(features)
        loss = ce_loss(pred, labels)
        loss.backward()
        optim.step()   
    print(e, loss.item())        

0 0.3594667315483093
1 0.19438646733760834
2 0.1215486228466034
3 0.08176902681589127
4 0.3110867440700531
5 0.1188136488199234
6 0.17061206698417664
7 0.028603196144104004
8 0.06071077287197113
9 0.04918994382023811
10 0.1259954273700714
11 0.038997452706098557
12 0.09828086197376251
13 0.018851660192012787
14 0.10056978464126587
15 0.22952263057231903
16 0.006318177096545696
17 0.14536398649215698
18 0.10743352770805359
19 0.08553570508956909
20 0.0255085751414299
21 0.02918856404721737
22 0.0021302709355950356
23 0.049616724252700806
CPU times: user 8min 10s, sys: 22.5 s, total: 8min 32s
Wall time: 49.5 s


In [21]:
ac_train = model_accuracy(model, train_dataset)
sb.glue("ac_train", ac_train)

In [22]:
ac_test = model_accuracy(model, validation_dataset)
sb.glue("ac_test", ac_test)