# Mini deep-learning framework

In [30]:
import math
import torch
from torch import empty
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x208f6106e88>

### Classes and functions

In [187]:
class Module(object):
    def __init__(self):
        pass

    '''
    Compute forward pass from an input tensor and return a tensor
    or a tuple of tensors as output
    '''
    def forward(self, x):
        raise NotImplementedError
        
    def backward(self, grad):
        '''
        should get as input a tensor or a tuple of tensors containing the 
        gradient of the loss with respect to the module’s output, accumulate 
        the gradient wrt the parameters, and return a tensor or a tuple of
        tensors containing the gradient of the loss wrt the module’s input.
        '''
        raise NotImplementedError
        
    def params(self):
        '''
        param should return a list of pairs, each composed of a parameter tensor, and a gradient tensor
        of same size. This list should be empty for parameterless modules (e.g. ReLU).

        '''
        return []
        
    def reset_params(self):
        return

##########################################################################################################

from torch.nn.init import xavier_normal_, xavier_normal

class Linear(Module):
    def __init__(self, dim_in, dim_out):
        super(Linear, self).__init__()
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.epsilon = 1e-3
        self.x = 0

        # Initialize weights
        self.w = xavier_normal_(torch.empty(self.dim_out, self.dim_in))
        self.b = torch.empty(self.dim_out).normal_(0, self.epsilon)

        # Initialize gradient
        self.dl_dw = torch.empty(self.w.size())
        self.dl_db = torch.empty(self.b.size())
    
    def forward(self, x):
        self.x = x
        return self.x.mm(self.w.t()) + self.b


    def backward(self, grad):
        ds_dx = self.w.t()

        # do the same for every batch (batch dim becomes 1)
        dl_dx = ds_dx @ grad.t()

        # put batch dim back to 0
        dl_dx = dl_dx.t()

        # sum over all the outer product between (grad_1 * x_1^T) (_1 denotes not using mini-batches)
        self.dl_dw.add_(grad.t() @ self.x)

        # sum over the batch
        self.dl_db.add_(grad.sum(0))

        return dl_dx
        
    def params(self):
        return [(self.w, self.b), (self.dl_dw, self.dl_db)]
    
    def update_params(self, eta):
        self.w = self.w - eta * self.dl_dw
        self.b = self.b - eta * self.dl_db
        
    def reset_gradient(self):
        self.dl_dw.zero_()
        self.dl_db.zero_()

    def reset_params(self):
        # Initialize weights
        xavier_normal_(self.w)
        self.b.normal_(0, self.epsilon)

##############################################################################################################

class Sequential(Module):
    def __init__(self, *modules):
        super(Sequential, self).__init__()
        self.module_lst = []
        for module in modules:
            self.module_lst.append(module)
    
    def forward(self, x):
        for module in self.module_lst:
            x = module.forward(x)
        return x
        
    def backward(self, grad):
        for module in reversed(self.module_lst):
            grad = module.backward(grad)
        return grad
    
    def update_params(self, eta):
        for module in self.module_lst:
            module.update_params(eta)
            
    def params(self):
        lst = []
        for module in self.module_lst:
            lst.append(module.params())
        return lst
    
    def reset_gradient(self):
        for module in self.module_lst:
            module.reset_gradient()
        return
    
    def reset_params(self):
        for module in self.module_lst:
            module.reset_params()
            
###########################################################################################################

def dReLU(x):
    s = x.clone()
    s[x>0] = 1
    s[x<=0] = 0
    return s


class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.clamp(min=0)
        
    def backward(self, grad):
        #ds_dx = dReLU(self.x)
        ds_dx = (torch.sign(self.x) + 1)/2
        dl_dx = ds_dx*grad
        return dl_dx
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

#############################################################################################################

class Tanh(Module):
    def __init__(self):
        super(Tanh, self).__init__()
    
    def forward(self, x):
        self.x = x
        return x.tanh()
        
    def backward(self, grad):
        ds_dx = 4 * (self.x.exp() + self.x.mul(-1).exp()).pow(-2)
        dl_dx = ds_dx*grad
        return dl_dx
        
    def params(self):
        return []
    
    def update_params(self, eta):
        return
    
    def reset_gradient(self):
        return

#############################################################################################################

class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()
        
    def forward(self, v, t):
        return (v - t).pow(2).sum()
    
    def backward(self, v, t):
        return 2 * (v - t)

In [1]:
# Maybe no need to implement ourselves (no written in the project pdf at least, but reward for 'originality')
# From https://towardsdatascience.com/on-implementing-deep-learning-library-from-scratch-in-python-c93c942710a8

class Optimizer(object):
    def __init__(self, parameters):
        self.parameters = parameters
    
    def step(self): 
        raise NotImplementedError

    def zeroGrad(self):
        for p in self.parameters:
            p.grad = 0.

class SGD(Optimizer):
    def __init__(self, parameters, lr = .001, weight_decay = 0.0, momentum = .9):
        super().__init__(parameters)
        self.lr           = lr
        self.weight_decay = weight_decay
        self.momentum     = momentum
        self.velocity     = []
        for p in parameters:
            self.velocity.append(np.zeros_like(p.grad))

    def step(self):
        for p,v in zip(self.parameters, self.velocity):
            v = self.momentum * v + p.grad + self.weight_decay * p.data
            p.data = p.data-self.lr * v

In [15]:
def compute_accuracy(model, input, target, mini_batch_size):
    nb_error = 0
    for b in range(0, input.size(0), mini_batch_size):
        output = model.forward(input.narrow(0, b, mini_batch_size))
        pred = output.max(1)[1]
        batch_error = (pred - target.narrow(0, b, mini_batch_size)).abs().sum()
        nb_error += batch_error
    return 100 * (1 - nb_error.item() / len(target))

In [18]:
def one_hot(target):
    one_hot_target = torch.zeros(target.shape[0], 2)
    for i in range(one_hot_target.shape[0]):
        if target[i] == 0:
            one_hot_target[i, 0] = 1
            one_hot_target[i, 1] = -1
        else:
            one_hot_target[i, 1] = 1
            one_hot_target[i, 0] = -1
    return one_hot_target

### Generate data

In [4]:
def generate_disk_set(N=1000):
    
    # Generate train sets of 2 uniform distributions on [0,1]x[0,1]
    train_input = torch.empty(N, 2).uniform_(0, 1)
    test_input = torch.empty(N, 2).uniform_(0, 1)
    
    recenter = torch.tensor([0.5, 0.5]) # to act as if the train data was centered around 0, to ease the following computation
    
    # Generate the target tensors filled with 1 if datapoint is inside of specific circle
    train_target = (-(train_input - recenter).pow(2).sum(1).sqrt().sub(1 / math.sqrt(2 * math.pi))).sign().add(1).div(2).int()
    test_target = (-(train_input - recenter).pow(2).sum(1).sqrt().sub(1 / math.sqrt(2 * math.pi))).sign().add(1).div(2).int()
    
    return train_input, test_input, train_target, test_target

In [25]:
N = 1000
train_input, test_input, train_target, test_target = generate_disk_set(N)
train_input, test_input, train_target, test_target

(tensor([[0.9462, 0.9302],
         [0.0047, 0.8419],
         [0.5731, 0.0127],
         ...,
         [0.4621, 0.7786],
         [0.4808, 0.2043],
         [0.2455, 0.9814]]),
 tensor([[0.1802, 0.3494],
         [0.5447, 0.4390],
         [0.1462, 0.5370],
         ...,
         [0.8781, 0.3767],
         [0.4319, 0.5806],
         [0.9200, 0.2099]]),
 tensor([0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
         0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0,
         0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0,
         1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
         1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
         1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
         1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,

### Model

In [190]:
# Definition of constants
input_units = 2
output_units = 1
hidden_units = 25
nb_epochs = 100
test_size = 1000
train_size = 1000

model_1 = Sequential(
            Linear(input_units, hidden_units),
            ReLU(),
            Linear(hidden_units, hidden_units),
            Tanh(),
            Linear(hidden_units, output_units),
            )

### Fonction train_model

In [189]:
def train_model(model, train_input, train_target, nb_epochs, mini_batch_size, criterion=MSELoss(), eta=1e-6):
    model.reset_params()
    for e in range(nb_epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            
            # forward pass
            output = model.forward(train_input.narrow(0, b, mini_batch_size))
            loss = criterion.forward(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss += loss.item()

            # backward pass
            model.reset_gradient()
            model.backward(criterion.backward(output, train_target.narrow(0, b, mini_batch_size)))
            model.update_params(eta)

In [126]:
train_target.narrow(0, 1, mini_batch_size)

tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)

In [137]:
model_1.forward(train_input.narrow(0, 1, mini_batch_size))

tensor([[ 1.4106e-01],
        [-1.7337e-04],
        [ 7.2474e-02],
        [ 1.4133e-01],
        [ 1.2738e-01],
        [ 4.9601e-02],
        [ 1.1673e-01],
        [ 1.7925e-01],
        [ 2.0086e-01],
        [ 1.2208e-01],
        [ 1.9252e-01],
        [ 8.3029e-03],
        [ 6.4162e-02],
        [ 1.0196e-01],
        [ 1.1079e-01],
        [ 5.6222e-02],
        [ 1.2798e-01],
        [ 1.5943e-01],
        [ 4.3864e-02],
        [ 1.0029e-01],
        [ 1.8674e-02],
        [ 2.7410e-02],
        [ 3.7596e-03],
        [ 6.0798e-02],
        [ 1.2142e-01],
        [ 1.0152e-02],
        [ 8.6384e-02],
        [ 5.4576e-02],
        [ 9.5266e-02],
        [ 1.6922e-01],
        [ 1.2340e-01],
        [ 9.7611e-02],
        [ 3.8578e-02],
        [ 7.2916e-02],
        [ 1.4092e-01],
        [ 1.8350e-01],
        [ 1.2244e-01],
        [ 7.6669e-02],
        [ 2.5605e-02],
        [ 8.9151e-02],
        [ 1.6848e-01],
        [ 1.4364e-01],
        [ 6.3846e-02],
        [ 7

In [135]:
output = model_1.forward(train_input.narrow(0, 1, mini_batch_size))
pred = output.max(1)
pred

torch.return_types.max(
values=tensor([-1.3174e-01,  1.3679e-01,  1.0866e-01, -6.8916e-02,  1.6267e-02,
         1.0598e-01,  2.8423e-02, -1.2706e-01,  5.5364e-03,  3.0729e-02,
         1.2771e-02,  1.3280e-01,  2.0914e-01,  1.8903e-01,  1.0594e-01,
         2.0135e-01,  1.5535e-01,  1.2017e-01,  1.4459e-01,  9.1102e-02,
         2.4246e-03,  6.6946e-02,  1.4694e-01, -4.2813e-02,  1.1405e-01,
         1.2858e-01,  1.5123e-01, -1.8841e-02, -8.8439e-02, -1.1445e-01,
         1.2750e-01,  9.7018e-02,  1.7609e-01,  7.4630e-02, -6.5555e-02,
         1.1453e-01, -1.0267e-01,  1.9152e-01,  3.0462e-02,  2.1661e-01,
        -1.3048e-01,  1.2642e-01,  1.8533e-01,  3.9303e-02,  1.6528e-02,
         3.6383e-02,  2.6786e-02,  7.4169e-02,  1.7874e-01, -1.2727e-02,
        -4.8406e-02,  1.3278e-02,  3.3684e-02,  2.1524e-01,  1.6768e-01,
        -7.1598e-02,  1.3777e-01,  5.4457e-02,  1.4134e-02, -1.0886e-01,
         1.2966e-01,  2.9005e-05,  1.1616e-01, -7.1770e-02,  1.3753e-01,
         1.0086e-01,

In [84]:
model = model_1
model.reset_params()
output = model.forward(train_input.narrow(0, 1, mini_batch_size))
print(output[1:10], 'ok', train_target.narrow(0, 1, mini_batch_size))

tensor([[0.0000],
        [0.0875],
        [0.2360],
        [0.1794],
        [0.0571],
        [0.1661],
        [0.3314],
        [0.2842],
        [0.1741]]) ok tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)


In [201]:
mini_batch_size = 100
train_target_one_hot = one_hot(train_target)

for i in range(100):
    train_model(model_1, train_input, train_target_one_hot, nb_epochs, mini_batch_size)
    # issue is that I put the model_1 output_hidden as 1 instead of 2.. 
    # train_model(model_1, train_input, train_target.reshape(train_target.size(0),1), nb_epochs, mini_batch_size)
    print("Train accuracy: ", round(compute_accuracy(model_1, train_input, train_target, mini_batch_size), 2))
    print("Test accuracy:", round(compute_accuracy(model_1, test_input, test_target, mini_batch_size), 2))

Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accuracy: 51.3
Train accuracy:  51.3
Test accurac

In [140]:
train_target.narrow(0, 1, mini_batch_size)

tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)

In [167]:
(2*(model.forward(train_input.narrow(0, 1, mini_batch_size)) - train_target.narrow(0, 1, mini_batch_size).reshape(100,1))).size()

torch.Size([100, 1])

In [180]:
model.forward(train_input.narrow(0, 1, mini_batch_size))

tensor([[0.0000],
        [0.0427],
        [0.0049],
        [0.0000],
        [0.0000],
        [0.0128],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0395],
        [0.0420],
        [0.0249],
        [0.0000],
        [0.0429],
        [0.0000],
        [0.0000],
        [0.0274],
        [0.0000],
        [0.0000],
        [0.0081],
        [0.0454],
        [0.0000],
        [0.0000],
        [0.0377],
        [0.0164],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0419],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0312],
        [0.0000],
        [0.0354],
        [0.0000],
        [0.0000],
        [0.0335],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0244],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0695],
        [0.0385],
        [0

In [181]:
train_target.narrow(0, 1, mini_batch_size)

tensor([0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
        0, 1, 1, 1], dtype=torch.int32)

In [159]:
2*(model.forward(train_input.narrow(0, 1, mini_batch_size)) - train_target.narrow(0, 1, mini_batch_size))

tensor([[ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0854,  0.0854, -1.9146,  ..., -1.9146, -1.9146, -1.9146],
        [ 0.0097,  0.0097, -1.9903,  ..., -1.9903, -1.9903, -1.9903],
        ...,
        [ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0000,  0.0000, -2.0000,  ..., -2.0000, -2.0000, -2.0000],
        [ 0.0493,  0.0493, -1.9507,  ..., -1.9507, -1.9507, -1.9507]])