# Linear

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#export
import sys
sys.path.insert(0, '/'.join(sys.path[0].split('/')[:-1] + ['scripts']))

from model import *

In [3]:
#export
class Module():
    '''Similar to pytorch Module, parent class to layers'''
    def __init__(self):
        self._parameters = {}
        
    def __setattr__(self, k, v):
        if isinstance(v, Parameter): 
            self._parameters[k] = v
        super().__setattr__(k, v)
        
    def __call__(self, *args):
        self.args = args
        self.out = self.fwd(*args)
        return self.out
    
    def parameters(self):
        for param in self._parameters.values():
            yield param
    
    def forward(self): raise NotImplementedError('Module.forward')
    
    def backward(self): self.bwd(self.out, *self.args)

In [4]:
#export
class Linear(Module):
    '''Linear layer'''
    def __init__(self, in_dim, num_hidden, end=False, require_grad=True):
        super().__init__()
        self.w = Parameter(init_weight(in_dim, num_hidden, end), require_grad)
        self.b = Parameter(init_bias(num_hidden), require_grad)
        
    def fwd(self, inp):
        return inp @ self.w.data + self.b.data
    
    def bwd(self, out, inp):
        inp.g = out.g @ self.w.data.t()
        self.w.update(inp.t() @ out.g)
        self.b.update(out.g.sum(0))
        
    def __repr__(self, t=''):
        return f"{t+'    '}Linear({self.w.data.shape[0]}, {self.w.data.shape[1]})"

class ReLU(Module):
    '''ReLU activation function (as a module)'''
    def fwd(self, inp):
        return inp.clamp_min(0.) - 0.5
    
    def bwd(self, out, inp):
        inp.g = (inp > 0).float() * out.g
        
    def __repr__(self, t=''):
        return f"{t+'    '}ReLU()"
        
class CrossEntropy(Module):
    '''Cross Entropy loss function (as a module)'''
    def fwd(self, inp, tar):
        return cross_entropy(inp, tar)
    
    def bwd(self, loss, inp, tar):
        inp_soft = softmax(inp)
        inp_soft[range(tar.shape[0]), tar] -= 1
        inp.g = inp_soft / tar.shape[0]

    def __repr__(self):
        return '(CrossEntropy)'

In [5]:
#export
def get_lin_model(data_bunch, num_hidden=50):
    '''Util function for obtaining two (linear) layer fully connected model'''
    in_dim = data_bunch.train_ds.x_data.shape[1]
    out_dim = int(max(data_bunch.train_ds.y_data) + 1)
    return Sequential(Linear(in_dim, num_hidden), 
                      ReLU(), 
                      Linear(num_hidden, out_dim, end=True))

# Tests

In [6]:
nh = 50

x_train, y_train, x_valid, y_valid = get_mnist_data()
(num_data, in_dim), out_dim = x_train.shape, int(y_train.max() + 1)

In [7]:
model = Sequential(Linear(in_dim, nh), 
                   ReLU(), 
                   Linear(nh, out_dim, True))
loss_fn = CrossEntropy()

In [8]:
model

(Model)
    Linear(784, 50)
    ReLU()
    Linear(50, 10)

In [9]:
for param in model.parameters():
    print(param)

shape: (784, 50), grad: True
shape: (50,), grad: True
shape: (50, 10), grad: True
shape: (10,), grad: True


In [10]:
loss = loss_fn(model(x_train), y_train)
loss_fn.backward()
model.backward()

xtg = x_train.g.clone()
w1g = model.layers[0].w.grad.clone()
b1g = model.layers[0].b.grad.clone()
w2g = model.layers[2].w.grad.clone()
b2g = model.layers[2].b.grad.clone()

x_train2 = x_train.clone().requires_grad_(True)
model.layers[0].w.data.requires_grad_(True)
model.layers[0].b.data.requires_grad_(True)
model.layers[2].w.data.requires_grad_(True)
model.layers[2].b.data.requires_grad_(True)

loss = loss_fn(model(x_train2), y_train)
loss.backward()

test_near(w1g, model.layers[0].w.data.grad)
test_near(b1g, model.layers[0].b.data.grad)
test_near(w2g, model.layers[2].w.data.grad)
test_near(b2g, model.layers[2].b.data.grad)