In [1]:
from pathlib import Path
import requests
import pickle, gzip
import matplotlib.pyplot as plt 
import numpy as np 
import torch 
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from dataclasses import dataclass

#export
from torch.utils.data import TensorDataset, DataLoader, Dataset

#export
from torch.utils.data import TensorDataset, DataLoader, Dataset
from dataclasses import dataclass
from typing import Any, Collection, Callable
from functools import partial, reduce


DATA_PATH = Path('data')
PATH = DATA_PATH/'mnist'
PATH.mkdir(parents=True, exist_ok=True)

URL='http://deeplearning.net/data/mnist/'
FILENAME='mnist.pkl.gz'

if not (PATH/FILENAME).exists():
    content = requests.get(URL+FILENAME).content
    (PATH/FILENAME).open('wb').write(content)

In [2]:

#export
from tqdm import tqdm, tqdm_notebook, trange, tnrange
from ipykernel.kernelapp import IPKernelApp

def in_notebook(): return IPKernelApp.initialized()


if in_notebook():
    tqdm = tqdm_notebook
    trange = tnrange


In [3]:
with gzip.open(PATH/FILENAME, 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

In [4]:
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))

In [5]:
train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)

In [6]:
def array_to_image(x):
    return x.view(1, 28, 28)

In [7]:
@dataclass
class TfmDataset(Dataset):
    ds: Dataset
    tfm: Callable = None
    
    def __len__(self):
        return len(self.ds)
    
    def __getitem__(self,idx):
        x, y = self.ds[idx]
        if self.tfm is not None:
            x=self.tfm(x)
        return x, y

In [8]:
train_ds_tfm=TfmDataset(train_ds, array_to_image)
valid_ds_tfm=TfmDataset(valid_ds, array_to_image)

In [9]:
x, y =next(iter(valid_ds_tfm))

In [10]:
def loss_batch(model, xb, yb, loss_fn, opt=None):
    loss = loss_fn(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    return loss.item(), len(xb)



def fit(epochs, model, loss_fn, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb,yb in train_dl: loss_batch(model, xb, yb, loss_fn, opt)

        model.eval()
        with torch.no_grad():
            losses,nums = zip(*[loss_batch(model, xb, yb, loss_fn)
                                for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums)) / np.sum(nums)

        print(epoch, val_loss)

In [11]:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func 
    def forward(self, x):
        return self.func(x)

In [12]:
def conv_2_relu(channel_in, channel_out, kernel_size, stride):
    return nn.Sequential(nn.Conv2d(channel_in, channel_out, kernel_size, stride, padding=stride//2), nn.ReLU())

In [13]:
def Flatten(): 
    return Lambda(lambda x: x.view((x.size(0), -1)))
def PoolFlatten(): 
    return nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten())

In [14]:
def simple_cnn(activations, kernel_sizers, strides):
    layers = []
    for i in range(len(strides)):
        layers.append(conv_2_relu(activations[i], 
                                 activations[i+1], 
                                 kernel_sizers[i], 
                                 strides[i]))
    layers.append(Flatten())
    return nn.Sequential(*layers)

In [None]:
def get_model():
    model = simple_cnn([1,16,16,10], [3,3,3], [2,2,2])
    return model, optim.SGD(model.parameters(), lr=0.01)

In [None]:
model, opt = get_model()

In [15]:
train_dl = DataLoader(train_ds_tfm, batch_size=64)
valid_dl = DataLoader(valid_ds_tfm, batch_size=64*2)

In [None]:
loss_fn=F.cross_entropy
fit(2, model, loss_fn, opt, train_dl, valid_dl)

In [16]:
def to_device(device, b): 
    return [o.to(device) for o in b]
default_device = torch.device('cuda')



In [17]:

#export
from tqdm import tqdm, tqdm_notebook, trange, tnrange
from ipykernel.kernelapp import IPKernelApp

def in_notebook(): return IPKernelApp.initialized()

def to_device(device, b): return [o.to(device) for o in b]
default_device = torch.device('cuda')

if in_notebook():
    tqdm = tqdm_notebook
    trange = tnrange

@dataclass
class DeviceDataLoader():
    dl: DataLoader
    device: torch.device
    progress_func:Callable=None
        
    def __len__(self): return len(self.dl)
    def __iter__(self):
        self.gen = (to_device(self.device,o) for o in self.dl)
        if self.progress_func is not None:
            self.gen = self.progress_func(self.gen, total=len(self.dl), leave=False)
        return iter(self.gen)

    @classmethod
    def create(cls, *args, device=default_device, progress_func=tqdm, **kwargs):
        return cls(DataLoader(*args, **kwargs), device=device, progress_func=progress_func)

In [18]:
def get_data(train_ds, valid_ds, bs):
    return (DeviceDataLoader.create(train_ds, bs,   shuffle=True),
            DeviceDataLoader.create(valid_ds, bs*2, shuffle=False))

In [19]:
train_dl,valid_dl = get_data(train_ds_tfm, valid_ds_tfm, 64)

In [20]:
def get_model():
    model = simple_cnn([1,16,16,10], [3,3,3], [2,2,2]).to(default_device)
    return model, optim.SGD(model.parameters(), lr=0.01)

In [32]:
model,opt = get_model()


In [33]:
fit(2, model, loss_fn, opt, train_dl, valid_dl)

NameError: name 'loss_fn' is not defined

In [22]:
#export
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl):
    for epoch in tnrange(epochs):
        model.train()
        for xb,yb in train_dl:
            loss,_ = loss_batch(model, xb, yb, loss_fn, opt)
            if train_dl.progress_func is not None: train_dl.gen.set_postfix_str(loss)

        model.eval()
        with torch.no_grad():
            losses,nums = zip(*[loss_batch(model, xb, yb, loss_fn)
                                for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums)) / np.sum(nums)

        print(epoch, val_loss)

In [23]:

#export
class DataBunch():
    def __init__(self, train_ds, valid_ds, bs=64, device=None, train_tfm=None, valid_tfm=None):
        self.device = default_device if device is None else device
        self.train_dl = DeviceDataLoader.create(TfmDataset(train_ds,train_tfm), bs, shuffle=True)
        self.valid_dl = DeviceDataLoader.create(TfmDataset(valid_ds, valid_tfm), bs*2, shuffle=False)

class Learner():
    def __init__(self, data, model):
        self.data,self.model = data,model.to(data.device)

    def fit(self, epochs, lr, opt_fn=optim.SGD):
        opt = opt_fn(self.model.parameters(), lr=lr)
        loss_fn = F.cross_entropy
        fit(epochs, self.model, loss_fn, opt, self.data.train_dl, self.data.valid_dl)

In [24]:
data = DataBunch(train_ds, valid_ds, 64, train_tfm=conv_2_relu, valid_tfm=conv_2_relu)

opt_fn = partial(optim.SGD, momentum=0.9)

In [30]:
learner = Learner(data, simple_cnn([1,16,16,10], [3,3,3], [2,2,2]))

In [31]:
learner.fit(1, 0.05/5, opt_fn=opt_fn)



A Jupyter Widget

A Jupyter Widget

TypeError: conv_2_relu() missing 3 required positional arguments: 'channel_out', 'kernel_size', and 'stride'

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(16, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Lambda()
)