In [1]:
#from exp.helper_methods import *
#from exp.data_loader import *
#from exp.neural import *

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In Command mode press `h` to see all shortcuts

## helper_methods

In [3]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline

In [4]:
#export
import math
from torch import tensor

In [5]:
#export
import operator
def test(a,b,cmp,cname=None):
    if cname is None: cname=cmp.__name__
    assert cmp(a,b),f"{cname}:\n{a}\n{b}"

def test_eq(a,b): test(a,b,operator.eq,'==')

In [6]:
#export
def near(a,b): return torch.allclose(a, b, rtol=1e-3, atol=1e-5)
def test_near(a,b): test(a,b,near)

In [7]:
#export
def test_near_zero(a,tol=1e-3): assert a.abs()<tol, f"Near zero: {a}"

In [8]:
#export
def normalize(x, m, s): return (x-m)/s

In [9]:
#!python notebook2script.py helper_methods.ipynb

## data_loader

In [10]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline

In [11]:
#export
from fastai import datasets
import pickle
import gzip
from torch import tensor

In [12]:
#export
urls = {"MNIST_URL" : 'http://deeplearning.net/data/mnist/mnist.pkl'}

In [13]:
#export
url = urls["MNIST_URL"]
def get_data():
    path = datasets.download_data(url, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

In [14]:
#!python notebook2script.py data_loader.ipynb

## Neural

In [15]:
# %load_ext autoreload
# %autoreload 2
# %matplotlib inline

In [16]:
#export
class Neural():
    def __call__(self, *args):
        self.args = args
        self.out = self.forward(*args)
        return self.out
    
    def forward(self): raise Exception('not implemented')
    def backward(self): self.bwd(self.out, *self.args)

In [17]:
#export
class Relu(Neural):
    def forward(self, inp): return inp.clamp_min(0.)-0.5
    def bwd(self, out, inp): inp.g = (inp>0).float() * out.g

In [18]:
#export
class Lin(Neural):
    def __init__(self, w, b): self.w,self.b = w,b
        
    def forward(self, inp): return inp@self.w + self.b
    
    def bwd(self, out, inp):
        inp.g = out.g @ self.w.t()
        self.w.g = inp.t() @ out.g
        #self.w.g = torch.einsum("bi,bj->ij", inp, out.g)
        self.b.g = out.g.sum(0)

In [19]:
#export
class Mse(Neural):
    def __init__(self):
        self.name = "Mse"
    def forward (self, inp, targ): return (inp.squeeze() - targ).pow(2).mean()
    def bwd(self, out, inp, targ): inp.g = 2*(inp.squeeze()-targ).unsqueeze(-1) / targ.shape[0]

In [20]:
#!python notebook2script.py neural.ipynb

## Model

In [21]:
#export
import math
import torch

In [22]:
x_train,y_train,x_valid,y_valid = get_data()

In [23]:
train_mean,train_std = x_train.mean(),x_train.std()
x_train = normalize(x_train, train_mean, train_std)
# NB: Use training, not validation mean for validation set
x_valid = normalize(x_valid, train_mean, train_std)

In [24]:
n,m = x_train.shape
c = y_train.max()+1
n,m,c

(50000, 784, tensor(10))

In [25]:
# num hidden
nh = 50

In [26]:
# simplified kaiming init / he init
#784, 50
w1 = torch.randn(m,nh)*math.sqrt(2./m)
b1 = torch.zeros(nh)
w2 = torch.randn(nh,1)*math.sqrt(2./nh)
b2 = torch.zeros(1)

In [27]:
class Model():
    def __init__(self):
        self.layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]
        self.loss = Mse()
        
    def __call__(self, x, targ):
        for l in self.layers: x = l(x)
        return self.loss(x, targ)
    
    def backward(self):
        self.loss.backward()
        for l in reversed(self.layers): l.backward()

In [28]:
w1.g,b1.g,w2.g,b2.g = [None]*4
model = Model()

In [29]:
%time loss = model(x_train, y_train); loss

CPU times: user 90.3 ms, sys: 0 ns, total: 90.3 ms
Wall time: 15.2 ms


tensor(40.3340)

In [30]:
%time model.backward()

CPU times: user 211 ms, sys: 23.3 ms, total: 234 ms
Wall time: 39.2 ms


In [35]:
learning_rate = 0.0001
epochs = 15

In [36]:
for epoch in range(epochs):
    parameters = [w1,b1,w2,b2]
    for parameter in parameters:
        parameter-=learning_rate*parameter.g
    loss = model(x_train, y_train)
    print(loss)

tensor(8.5826)
tensor(8.6245)
tensor(8.6761)
tensor(8.7374)
tensor(8.8080)
tensor(8.8877)
tensor(8.9764)
tensor(9.0738)
tensor(9.1800)
tensor(9.2948)
tensor(9.4181)
tensor(9.5497)
tensor(9.6896)
tensor(9.8377)
tensor(9.9939)
