# Mini deep-learning framework

In [1]:
import math
import torch
from torch import empty
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x1a916f2a3c8>

### Classes and functions

In [2]:
class Module(object):
    def __init__(self):
        pass

    def forward(self, x):
        raise NotImplementedError
        
    def backward(self, grad):
        raise NotImplementedError
        
    def params(self):
        return []
        
    def reset_params(self):
        return

##########################################################################################################

from torch.nn.init import xavier_normal_, xavier_normal

class Linear(Module):
    def __init__(self, dim_in, dim_out):
        super(Linear, self).__init__()
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.epsilon = 1e-3
        self.x = 0

        # Initialize weights
        self.w = xavier_normal_(torch.empty(self.dim_out, self.dim_in))
        self.b = torch.empty(self.dim_out).normal_(0, self.epsilon)

        # Initialize gradient
        self.dl_dw = torch.empty(self.w.size())
        self.dl_db = torch.empty(self.b.size())
    
    def forward(self, x):
        self.x = x
        return self.x.mm(self.w.t()) + self.b


    def backward(self, grad):
        ds_dx = self.w.t()

        # do the same for every batch (batch dim becomes 1)
        dl_dx = ds_dx @ grad.t()

        # put batch dim back to 0
        dl_dx = dl_dx.t()

        # sum over all the outer product between (grad_1 * x_1^T) (_1 denotes not using mini-batches)
        self.dl_dw.add_(grad.t() @ self.x)

        # sum over the batch
        self.dl_db.add_(grad.sum(0))

        return dl_dx
        
    def params(self):
        return [(self.w, self.b), (self.dl_dw, self.dl_db)]
    
    def update_params(self, eta):
        self.w = self.w - eta * self.dl_dw
        self.b = self.b - eta * self.dl_db
        
    def reset_gradient(self):
        self.dl_dw.zero_()
        self.dl_db.zero_()

    def reset_params(self):
        # Initialize weights
        xavier_normal_(self.w)
        self.b.normal_(0, self.epsilon)

##############################################################################################################

class Sequential(Module):
    def __init__(self, *modules):
        super(Sequential, self).__init__()
        self.module_lst = []
        for module in modules:
            self.module_lst.append(module)
    
    def forward(self, x):
        for module in self.module_lst:
            x = module.forward(x)
        return x
        
    def backward(self, grad):
        for module in reversed(self.module_lst):
            grad = module.backward(grad)
        return grad
    
    def update_params(self, eta):
        for module in self.module_lst:
            module.update_params(eta)
            
    def params(self):
        lst = []
        for module in self.module_lst:
            lst.append(module.params())
        return lst
    
    def reset_gradient(self):
        for module in self.module_lst:
            module.reset_gradient()
        return
    
    def reset_params(self):
        for module in self.module_lst:
            module.reset_params()
        return    
            
            
###########################################################################################################

class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.clamp(min=0)
        
    def backward(self, grad):
        ds_dx = (torch.sign(self.x) + 1) / 2
        dl_dx = ds_dx * grad
        return dl_dx
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

#############################################################################################################

class Tanh(Module):
    def __init__(self):
        super(Tanh, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.tanh()
        
    def backward(self, grad):
        ds_dx = 4 * (self.x.exp() + self.x.mul(-1).exp()).pow(-2)
        dl_dx = ds_dx*grad
        return dl_dx
        
    def params(self):
        return []
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

#############################################################################################################

class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()
        
    def forward(self, v, t):
        return (v - t).pow(2).sum()
    
    def backward(self, v, t):
        return 2 * (v - t)

In [3]:
# Maybe no need to implement ourselves (no written in the project pdf at least, but reward for 'originality')
# From https://towardsdatascience.com/on-implementing-deep-learning-library-from-scratch-in-python-c93c942710a8

class Optimizer(object):
    def __init__(self, params):
        self.params = params
    
    def step(self): 
        raise NotImplementedError

    def zeroGrad(self):
        for p in self.params:
            p.grad = 0.

class SGD(Optimizer):
    def __init__(self, params, lr = .001, momentum = .999):
        super().__init__(params)
        self.lr           = lr
        self.weight_decay = weight_decay
        self.momentum     = momentum
        self.velocity     = []
        for p in params:
            self.velocity.append(np.zeros_like(p.grad))

    def step(self):
        for p,v in zip(self.params, self.velocity):
            v = self.momentum * v + p.grad 
            p.data = p.data-self.lr * v

### Generate data

In [31]:
def generate_disk_set(N=1000):
    
    # Generate train sets of 2 uniform distributions on [0,1]x[0,1]
    input = torch.empty(N, 2).uniform_(0, 1)
    
    recenter = torch.tensor([0.5, 0.5]) # to act as if the train data was centered around 0, to ease the following computation
    
    # Generate the target tensors filled with 1 if datapoint is inside of specific circle
    target = (-(input - recenter).pow(2).sum(1).sqrt().sub(1 / math.sqrt(2 * math.pi))).sign().add(1).div(2).long()
    
    return input, target

In [32]:
N = 1000
train_input, train_target = generate_disk_set(N)
test_input, test_target = generate_disk_set(N)

In [39]:
def generate_disc_set(nb):
    # creating the circle in the middle of the points
    axis = torch.FloatTensor(1,2).uniform_(0.5,0.5)
    r = 1/((2*math.pi)**0.5)

    train_input   =  torch.FloatTensor(nb, 2).uniform_(0,1)
    train_target  =  torch.FloatTensor(nb, 2)
    train_mask    =  torch.FloatTensor(nb, 1)
    test_input    =  torch.FloatTensor(nb, 2).uniform_(0,1)
    test_target   =  torch.FloatTensor(nb, 2)
    test_mask     =  torch.FloatTensor(nb, 1)

    for i in range(0, len(train_input)):
        a = abs((train_input[i] - axis).pow(2).sum(1).view(-1).pow(0.5))
        b = abs((test_input[i]  - axis).pow(2).sum(1).view(-1).pow(0.5))

        if a < r:
            train_target[i][0] = 0
            train_target[i][1] = 1
            train_mask[i]      = 1
        else:
            train_target[i][0] = 1
            train_target[i][1] = 0
            train_mask[i]      = 0

        if b < r:
            test_target[i][0] = 0
            test_target[i][1] = 1
            test_mask[i]      = 1
        else:
            test_target[i][0] = 1
            test_target[i][1] = 0
            test_mask[i]      = 0

    return train_input, train_target, test_input, test_target, test_mask, train_mask

In [40]:
train_input, train_target, test_input, test_target, test_mask, train_mask = generate_disc_set(1000)

In [41]:
train_target

tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]])

### Model

In [17]:
# Definition of constants
input_units = 2
output_units = 2
hidden_units = 25
nb_epochs = 1000
mini_batch_size = 100
eta = 1e-3

model_1 = Sequential(
            Linear(input_units, hidden_units),
            ReLU(),
            Linear(hidden_units, hidden_units),
            ReLU(),
            Linear(hidden_units, output_units),
            ReLU()
)

### Fonction train_model

In [21]:
def train_model(model, train_input, train_target, nb_epochs, mini_batch_size, criterion=MSELoss(), eta=1e-3):
    model.reset_params()
    for e in range(nb_epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            
            # forward pass
            output = model.forward(train_input.narrow(0, b, mini_batch_size))
            loss = criterion.forward(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss += loss.item()

            # backward pass
            model.reset_gradient()
            model.backward(criterion.backward(output, train_target.narrow(0, b, mini_batch_size)))
            model.update_params(eta)
            
        output = model.forward(train_input)
        pred = output.max(1)[1]
        error = (pred != train_mask.view(1,-1)).sum()
        error_rate = (error/train_input.size(0)) * 100

        print("epoch: {}, loss: {:.02f}, error {:.02f}% ".format(e, sum_loss, error_rate))

In [38]:
train_model(model_1, train_input, train_target, nb_epochs, mini_batch_size)


RuntimeError: The size of tensor a (2) must match the size of tensor b (100) at non-singleton dimension 1

In [51]:
train_target.narrow(0, 1, mini_batch_size)

tensor([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
        1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
        0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
        0, 0, 1, 1], dtype=torch.int32)

In [14]:
model_1.forward(train_input.narrow(0, 1, mini_batch_size))

tensor([[-0.0223],
        [-0.1567],
        [-0.1203],
        [-0.1463],
        [-0.1464],
        [-0.0544],
        [-0.1133],
        [-0.0037],
        [-0.1420],
        [-0.1136],
        [-0.0576],
        [-0.0474],
        [-0.0663],
        [-0.1211],
        [-0.0402],
        [-0.0281],
        [-0.0350],
        [-0.0528],
        [-0.1044],
        [-0.0794],
        [-0.0733],
        [-0.0375],
        [-0.1585],
        [-0.0345],
        [-0.0866],
        [-0.0672],
        [-0.0220],
        [-0.1349],
        [-0.1387],
        [-0.0860],
        [-0.0209],
        [-0.1067],
        [-0.0811],
        [-0.1210],
        [-0.0131],
        [-0.0754],
        [-0.0578],
        [-0.0072],
        [-0.1040],
        [-0.1157],
        [-0.0236],
        [-0.0372],
        [-0.0154],
        [-0.1044],
        [-0.0253],
        [-0.0078],
        [-0.1118],
        [-0.1328],
        [-0.1306],
        [-0.0800],
        [-0.0568],
        [-0.0542],
        [-0.

In [52]:
output = model_1.forward(train_input.narrow(0, 1, mini_batch_size))
pred = output.max(1)
pred

torch.return_types.max(
values=tensor([0.0000, 0.0000, 0.0139, 0.0047, 0.0153, 0.0082, 0.0188, 0.0108, 0.0000,
        0.0151, 0.0142, 0.0166, 0.0000, 0.0000, 0.0000, 0.0123, 0.0033, 0.0151,
        0.0101, 0.0153, 0.0111, 0.0000, 0.0053, 0.0144, 0.0114, 0.0137, 0.0162,
        0.0086, 0.0120, 0.0094, 0.0000, 0.0127, 0.0085, 0.0093, 0.0000, 0.0000,
        0.0158, 0.0043, 0.0087, 0.0096, 0.0036, 0.0050, 0.0000, 0.0000, 0.0031,
        0.0139, 0.0039, 0.0036, 0.0112, 0.0184, 0.0000, 0.0000, 0.0016, 0.0000,
        0.0160, 0.0000, 0.0000, 0.0148, 0.0126, 0.0124, 0.0000, 0.0000, 0.0133,
        0.0158, 0.0176, 0.0121, 0.0000, 0.0000, 0.0000, 0.0147, 0.0000, 0.0106,
        0.0000, 0.0119, 0.0000, 0.0008, 0.0043, 0.0134, 0.0075, 0.0000, 0.0000,
        0.0172, 0.0000, 0.0052, 0.0000, 0.0171, 0.0000, 0.0024, 0.0090, 0.0011,
        0.0000, 0.0000, 0.0000, 0.0111, 0.0055, 0.0143, 0.0035, 0.0000, 0.0045,
        0.0099]),
indices=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [16]:
model = model_1
model.reset_params()
output = model.forward(train_input.narrow(0, 1, mini_batch_size))
print(output[1:10], 'ok', train_target.narrow(0, 1, mini_batch_size))

tensor([[0.2699],
        [0.1921],
        [0.2592],
        [0.2486],
        [0.1133],
        [0.2062],
        [0.0203],
        [0.2563],
        [0.2516]]) ok tensor([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
        1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
        0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
        1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0,
        1, 1, 1, 1], dtype=torch.int32)


In [63]:
def train_model_1(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)
    nb_epochs = 250

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()


In [64]:
from torch import nn
from torch import optim

def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 25),
        nn.ReLU(),
        nn.Linear(25, 25),
        nn.ReLU(),
        nn.Linear(25, 1)
    )

mini_batch_size = 100
train_target_one_hot = one_hot(train_target)

model_1 = create_shallow_model()

for i in range(100):
    train_model_1(model_1, train_input, train_target)
    # issue is that I put the model_1 output_hidden as 1 instead of 2.. 
    # train_model(model_1, train_input, train_target.reshape(train_target.size(0),1), nb_epochs, mini_batch_size)
    print("Train accuracy: ", round(compute_accuracy(model_1, train_input, train_target, mini_batch_size), 2))
    print("Test accuracy:", round(compute_accuracy(model_1, test_input, test_target, mini_batch_size), 2))

IndexError: Target 1 is out of bounds.

In [140]:
train_target.narrow(0, 1, mini_batch_size)

tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)

In [167]:
(2*(model.forward(train_input.narrow(0, 1, mini_batch_size)) - train_target.narrow(0, 1, mini_batch_size).reshape(100,1))).size()

torch.Size([100, 1])

In [180]:
model.forward(train_input.narrow(0, 1, mini_batch_size))

tensor([[0.0000],
        [0.0427],
        [0.0049],
        [0.0000],
        [0.0000],
        [0.0128],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0395],
        [0.0420],
        [0.0249],
        [0.0000],
        [0.0429],
        [0.0000],
        [0.0000],
        [0.0274],
        [0.0000],
        [0.0000],
        [0.0081],
        [0.0454],
        [0.0000],
        [0.0000],
        [0.0377],
        [0.0164],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0419],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0312],
        [0.0000],
        [0.0354],
        [0.0000],
        [0.0000],
        [0.0335],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0244],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0695],
        [0.0385],
        [0

In [181]:
train_target.narrow(0, 1, mini_batch_size)

tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)

In [159]:
2*(model.forward(train_input.narrow(0, 1, mini_batch_size)) - train_target.narrow(0, 1, mini_batch_size))

tensor([[ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0854,  0.0854, -1.9146,  ..., -1.9146, -1.9146, -1.9146],
        [ 0.0097,  0.0097, -1.9903,  ..., -1.9903, -1.9903, -1.9903],
        ...,
        [ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0493,  0.0493, -1.9507,  ..., -1.9507, -1.9507, -1.9507]])