In [1]:
#default_exp training_loop_03

In [2]:
#export
from ModernArchitecuturesFromScratch.fully_connected_network_02 import *
ba2 = __import__("ModernArchitecuturesFromScratch.fully_connected_network_02")

_all_ = ["ba2"]

In [3]:
n_in = 784
n_out = 10

In [4]:
xt, yt, xv, yv = get_mnist()
layers = [Linear(n_in,50, True), ReLU(), Linear(50,n_out, False)]
model = Model(layers)

# Better Cross Entropy

In [None]:
#export
def log_softmax(inp): return (inp.exp() / inp.exp().sum(-1, keepdim=True)).log()

def logsumexp(inp):
    a = inp.max(dim=1).values
    return a + ((inp-a[:,None]).exp().sum(-1)).log()

def log_softmax_improved(inp): 
    return inp - logsumexp(inp).unsqueeze(-1)

In [None]:
#export
def nll(inp, targ): return -(inp[range(targ.shape[0]), targ.long()].mean())

In [None]:
#export
def cross_entropy(inp, targ): return nll((log_softmax_improved(inp)), targ)

# Initial Train Loop

In [None]:
#export
def accuracy(preds, targ): return (torch.argmax(preds, dim=1)==targ).float().mean()

In [None]:
bs = 64
lr = 0.5

def fit(epochs, model, loss_func, train, valid):
    for epoch in range(epochs):
        for batch in range(math.ceil(len(train)//bs)):
            start = batch*bs
            end = batch*bs + bs
            train_batch = train[start:end]
            valid_batch = valid[start:end]
            
            loss = loss_func(model(train_batch), valid_batch)
            loss_func.backward(loss, )
            model.backward()
            
            with torch.no_grad():
                for l in model.layers:
                    if hasattr(l, 'w'):
                        l.w -= l.w.g * lr
                        l.b   -= l.b.g   * lr
                        l.w.g = 0
                        l.b.g = 0
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')

# Modules 

# Printing

In [None]:
#export
class SequentialModel():
    def __init__(self, *args):
        self.layers = list(args)
        self.training = True
    
    def __repr__(self):
        res = ["(Layer" + str(i+1) + "): " + str(m) for i,m in enumerate(self.layers)]
        return "\n".join(res)
    
    def __call__(self, x):
        for l in self.layers: x = l(x)
        return x
    
    def backward(self):
        for l in reversed(self.layers): l.backward()
    
    def parameters(self):
        for l in self.layers:
            for p in l.parameters(): yield p

In [None]:
def relu_repr(self): return f'ReLU()'
setattr(ReLU, '__repr__', relu_repr)

def linear_repr(self): return f'Linear({self.w.d.shape[0]}, {self.w.d.shape[1]})'
setattr(Linear, '__repr__', linear_repr)

def crosssoft_repr(self): return f'CrossEntropyLoss()'
setattr(CrossSoft, '__repr__', crosssoft_repr)

# Parameters

In [None]:
#export

class Parameter():
    def __init__(self, data=None, requires_grad=True):
        self.d = torch.Tensor() if data is None else data
        self.requires_grad = requires_grad
        self.grad = 0.
    
    def step(self, lr): 
        self.d -= lr * self.grad
    
    def zero_grad(self): self.grad = 0.
    
    def update(self, amount): self.grad = amount
        
    def __get__(self, instance, owner): return self.d
        
    def __repr__(self): 
        return f'Parameter: {self.d.shape}, Requires grad: {self.requires_grad}'

In [None]:
l_data = torch.zeros([4,6])
s_data = torch.randn([4,2])
param_l = Parameter(l_data)
param_s = Parameter(s_data)

In [None]:
param_l

In [None]:
param_s

In [None]:
#export

class Module():
    def __init__(self):
        self._params = {}
        
    def __call__(self, *args):
        self.args = args
        self.out = self.forward(*args)
        return self.out

    def forward(self): raise Exception("Not Implemented")

    def backward(self): self.bwd(self.out, *self.args)  
        
    def __setattr__(self,k,v):
        if isinstance(v, Parameter): self._params[k] = v  
        super().__setattr__(k,v)
    
    def parameters(self):
        for p in self._params.values(): yield p

In [None]:
#export

class CrossEntropy(Module):
    def forward(self, inp, targ):
        return cross_entropy(inp, targ)
    
    def bwd(self, loss, inp, targ):
        inp_s = softmax(inp)
        inp_s[range(targ.shape[0]), targ.long()] -= 1
        inp.g = inp_s / targ.shape[0]  

In [None]:
#export

class Linear(Module):
    def __init__(self, in_d, out_d, relu_after, req_grad=True): 
        super().__init__()
        self.w = Parameter(get_weight(in_d, out_d, relu_after), req_grad)
        self.b = Parameter(torch.zeros(out_d), req_grad)

    def forward(self, xb): return xb @ self.w.d + self.b.d

    def bwd(self, out, inp):
        inp.g = out.g @ self.w.d.t()
        self.w.update(inp.t() @ out.g)
        self.b.update(out.g.sum(0))
    
    def __repr__(self): return f'Linear({self.w.d.shape[0]}, {self.w.d.shape[1]})'

In [None]:
#export

class ReLU(Module):
    def forward(self, x): return x.clamp_min_(0.)-0.5
    def bwd(self, out, inp):
        inp.g = (inp>0).float() * out.g
    def __repr__(self): return f'ReLU()'

In [None]:
lin = Linear(3, 1, False)

In [None]:
lin._params

In [None]:
params = iter(lin.parameters())

In [None]:
next(params)

In [None]:
model = SequentialModel(Linear(n_in,50, True), ReLU(), Linear(50,n_out, False))

In [None]:
model

In [None]:
for p in model.parameters(): print(p)

# Optimizer

In [None]:
#export
class Optimizer():
    def __init__(self, params, lr): self.params, self.lr = list(params), lr
        
    def step(self): 
        for p in self.params: p.step(self.lr)
    
    def zero_grad(self): 
        for p in self.params: p.zero_grad()

In [None]:
optim = Optimizer(model.parameters(), 0.5)

In [None]:
bs = 64

def fit(epochs, model, optim, loss_func, train, valid):
    for epoch in range(epochs):
        for batch in range(math.ceil(len(train)//bs)):
            start = batch*bs
            end = batch*bs + bs
            train_batch = train[start:end]
            valid_batch = valid[start:end]
            
            loss = loss_func(model(train_batch), valid_batch)
            loss_func.backward()
            model.backward()
            
            optim.step()
            optim.zero_grad()
            
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')

In [None]:
fit(3, model, optim, CrossEntropy(), xt, yt)

In [None]:
#export
def get_model(lr):
    model = SequentialModel(Linear(784, 50, True), ReLU(), Linear(50, 10, False)) 
    loss_func = CrossEntropy()
    optimizer = Optimizer(model.parameters(), lr)
    return model, optimizer, loss_func

# Databunch

In [None]:
#export
class Dataset():
    
    def __init__(self, x, y): self.x, self.y = x, y
        
    def __getitem__(self, i): return self.x[i], self.y[i]
    
    def __len__(self): return len(self.x)

In [None]:
class DataLoader():
    def __init__(self, ds, bs): self.ds, self.bs = ds,bs
    def __iter__(self): 
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]

In [None]:
ds = Dataset(xt, yt)
dl = DataLoader(ds, bs)

In [None]:
def fit(epochs, model, optim, loss_func, data_loader):
    for epoch in range(epochs):
        for xb, yb in data_loader:
            loss = loss_func(model(xb), yb)
            loss_func.backward()
            model.backward()
            
            optim.step()
            optim.zero_grad()
            
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')

In [None]:
m, o, lf = get_model(0.5)
fit(3, m, o, lf, dl)

# Random Data

In [None]:
#export
class Batcher():
    def __init__(self, ds, bs, random): self.n, self.bs, self.rand = len(ds), bs, random
    def __iter__(self):
        self.idxs = torch.randperm(self.n) if self.rand else torch.arange(self.n)
        for i in range(0, self.n, self.bs): yield self.idxs[i:i+self.bs]

In [None]:
small_ds = Dataset(*ds[:10])

In [None]:
random = Batcher(small_ds, 4, True)
not_random = Batcher(small_ds, 4, False)

In [None]:
[r for r in random]

In [None]:
[nr for nr in not_random]

In [None]:
#export
def collate(b):
    xb, yb = zip(*b)
    return torch.stack(xb), torch.stack(yb)
    
    
class DataLoader():
    def __init__(self, ds, batcher, collate_fcn): self.ds, self.batcher, self.collate_fcn = ds, batcher, collate_fcn
    def __iter__(self): 
        for b in self.batcher: yield self.collate_fcn([self.ds[i] for i in b])
    def __len__(self): return len(self.ds)

# Validation

In [None]:
#export
def fit(epochs, model, optim, loss_func, train, valid):
    for epoch in range(epochs):
        
        model.training = True
        for xb, yb in train:
            loss = loss_func(model(xb), yb)
            loss_func.backward()
            model.backward()
            
            optim.step()
            optim.zero_grad()
        
        model.training = False
        acc, loss, epochs = 0,0,0
        for xb, yb in valid:
            pred = model(xb)
            acc += accuracy(pred, yb)
            loss += loss_func(pred, yb)
            epochs += 1
        acc /= epochs
        loss /= epochs
            
        print(f'Epoch {epoch+1}, Accuracy: {acc}, Loss: {loss}')

In [None]:
#export
def get_datasets():
    xt, yt, xv, yv = get_mnist()
    tr = Dataset(xt, yt)
    val = Dataset(xv, yv)
    train = DataLoader(tr, Batcher(tr, 64, True), collate)
    valid = DataLoader(val, Batcher(val, 64, False), collate)
    return train, valid

In [None]:
train, valid = get_datasets()
m, o, lf = get_model(0.1)
fit(3, m, o, lf, train, valid)