# A basic training loop

## From the last notebook...

In [43]:
import pickle, gzip, torch, math, numpy as np, torch.nn.functional as F
from pathlib import Path
from IPython.core.debugger import set_trace
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader

In [44]:
DATA_PATH = Path('data')
PATH = DATA_PATH/'mnist'

with gzip.open(PATH/'mnist.pkl.gz', 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

x_train,y_train,x_valid,y_valid = map(torch.tensor, (x_train,y_train,x_valid,y_valid))
x_train.min(),x_train.max()

(tensor(0.), tensor(0.9961))

In [45]:
bs=64
epochs = 2
lr=0.2

In [46]:
train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)

In [82]:
def loss_batch(model, xb, yb, loss_fn, opt=None):
    loss = loss_fn(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    return loss.item(), len(xb)

def fit(epochs, model, loss_fn, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb,yb in train_dl: loss_batch(model, xb, yb, loss_fn, opt)

        model.eval()
        with torch.no_grad():
            losses,nums = zip(*[loss_batch(model, xb, yb, loss_fn)
                                for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums)) / np.sum(nums)

        print(epoch, val_loss)

In [48]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func=func
        
    def forward(self, x): return self.func(x)

## Simplify nn.Sequential layers

In [49]:
def ResizeBatch(*size): return Lambda(lambda x: x.view((-1,)+size))
def Flatten(): return Lambda(lambda x: x.view((x.size(0), -1)))
def PoolFlatten(): return nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten())

In [50]:
model = nn.Sequential(
    ResizeBatch(1,28,28),
    nn.Conv2d(1,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    PoolFlatten()
)

In [51]:
def get_data(train_ds, valid_ds, bs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True),
            DataLoader(valid_ds, batch_size=bs*2))

train_dl,valid_dl = get_data(train_ds, valid_ds, bs)

In [52]:
loss_fn = F.cross_entropy

In [53]:
opt = optim.SGD(model.parameters(), lr=lr)

In [54]:
loss_fn(model(x_valid[0:bs]), y_valid[0:bs])

tensor(2.3035, grad_fn=<NllLossBackward>)

In [14]:
fit(epochs, model, loss_fn, opt, train_dl, valid_dl)

0 0.968078899383545
1 1.1741710505485534


## Transformations; refactor network

In [55]:
def mnist2image(b): return b[0].view(-1,1,28,28), b[1]

In [56]:
from collections import Iterable
from functools import reduce

def is_listy(x): return isinstance(x, (list,tuple))

def listify(p=None, q=None):
    if p is None: p=[]
    elif not isinstance(p, Iterable): p=[p]
    n = q if type(q)==int else 1 if q is None else len(q)
    if len(p)==1: p = p * n
    return p

def compose(funcs):
    return reduce(lambda f, g: lambda z: f(g(z)), listify(funcs), lambda o: o)

In [57]:
class IterPipe():
    def __init__(self, iterator, funcs): self.iter,self.func = iterator,compose(funcs)
    def __len__(self): return len(self.iter)
    def __iter__(self): return map(self.func, self.iter)

In [58]:
def get_dl(ds, bs, shuffle, tfms=None):
    return IterPipe(DataLoader(ds, batch_size=bs, shuffle=shuffle), tfms)

def get_data(train_ds, valid_ds, bs, train_tfms=None, valid_tfms=None):
    return (get_dl(train_ds, bs, shuffle=True, tfms=train_tfms),
            get_dl(valid_ds, bs*2, shuffle=False, tfms=valid_tfms))

In [59]:
train_dl,valid_dl = get_data(train_ds, valid_ds, bs, mnist2image, mnist2image)

In [60]:
x,y = next(iter(valid_dl))

In [61]:
valid_ds[0][0].shape, x[0].shape

(torch.Size([784]), torch.Size([1, 28, 28]))

In [22]:
torch.allclose(valid_ds[0][0], x[0].view(-1))

True

In [62]:
def conv2_relu(nif, nof, ks, stride):
    return nn.Sequential(nn.Conv2d(nif, nof, ks, stride, padding=ks//2), nn.ReLU())

def simple_cnn(actns, kernel_szs, strides):
    layers = [conv2_relu(actns[i], actns[i+1], kernel_szs[i], stride=strides[i])
        for i in range(len(strides))]
    layers.append(PoolFlatten())
    return nn.Sequential(*layers)

In [63]:
def get_model():
    model = simple_cnn([1,16,16,10], [3,3,3], [2,2,2])
    return model, optim.SGD(model.parameters(), lr=lr)

In [64]:
model,opt = get_model()

In [28]:
fit(epochs, model, loss_fn, opt, train_dl, valid_dl)

0 0.32992440748214724
1 0.9453816131591797


## CUDA

In [65]:
# TODO: handle non-lists (e.g. single tensor)
def to_device(device, b): return [o.to(device) for o in b]

default_device = torch.device('cuda')

In [66]:
from functools import partial

tfms = [partial(to_device, default_device), mnist2image]
train_dl,valid_dl = get_data(train_ds, valid_ds, bs, tfms, tfms)

In [67]:
def get_model():
    model = simple_cnn([1,16,16,10], [3,3,3], [2,2,2]).to(default_device)
    return model, optim.SGD(model.parameters(), lr=lr)

In [68]:
model,opt = get_model()

In [69]:
fit(epochs, model, loss_fn, opt, train_dl, valid_dl)

0 1.0220911031723023
1 0.7925131357192993


## Learner

In [91]:
from tqdm import tqdm, tqdm_notebook, trange, tnrange

def fit(epochs, model, loss_fn, opt, train_dl, valid_dl):
    for epoch in tnrange(epochs):
        model.train()
        it = tqdm_notebook(train_dl, leave=False)
        for xb,yb in it:
            loss,_ = loss_batch(model, xb, yb, loss_fn, opt)
            it.set_postfix_str(loss)

        model.eval()
        with torch.no_grad():
            losses,nums = zip(*[loss_batch(model, xb, yb, loss_fn)
                                for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums)) / np.sum(nums)

        print(epoch, val_loss)

In [92]:
class DataBunch():
    def __init__(self, train_ds, valid_ds, bs=64, device=None, train_tfms=None, valid_tfms=None):
        self.device = default_device if device is None else device
        dev_tfm = [partial(to_device, self.device)]
        self.train_dl = get_dl(train_ds, bs,   shuffle=True,  tfms=dev_tfm + listify(train_tfms))
        self.valid_dl = get_dl(valid_ds, bs*2, shuffle=False, tfms=dev_tfm + listify(valid_tfms))

class Learner():
    def __init__(self, data, model):
        self.data,self.model = data,model.to(data.device)

    def fit(self, epochs, lr, opt_fn=optim.SGD):
        opt = opt_fn(self.model.parameters(), lr=lr)
        loss_fn = F.cross_entropy
        fit(epochs, self.model, loss_fn, opt, self.data.train_dl, self.data.valid_dl)

In [93]:
data = DataBunch(train_ds, valid_ds, bs, train_tfms=mnist2image, valid_tfms=mnist2image)
model = simple_cnn([1,16,16,10], [3,3,3], [2,2,2])
learner = Learner(data, model)
opt_fn = partial(optim.SGD, momentum=0.9)

In [94]:
learner.fit(4, lr/5, opt_fn=opt_fn)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

0 0.493440464925766


HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

1 0.35726073141098025


HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

2 0.24172384033203126


HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

3 0.224763054561615


In [95]:
learner = Learner(data, simple_cnn([1,16,16,10], [3,3,3], [2,2,2]))

In [96]:
learner.fit(1, lr/5, opt_fn=opt_fn)
learner.fit(2, lr, opt_fn=opt_fn)
learner.fit(1, lr/5, opt_fn=opt_fn)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

0 0.46503244948387146


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

0 0.25512682299613954


HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

1 0.21429105672836304


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))

0 0.1611472924232483


In [178]:
# TODO: metrics