In [117]:
import torch
from torch import Tensor
import numpy as np
import math

## Training set and test set

In [118]:
def disk(nb_points) :
    radius = 1/np.sqrt(2*np.pi)
    inp = Tensor(nb_points,2).uniform_(0,1)
    ratio = torch.floor(torch.norm(inp,p=2,dim=1)/radius)
    target = 1-torch.clamp(ratio,min=0,max=1)
    t = Tensor(nb_points,2)
    for i in range(nb_points):
        if target[i] == 0:
            t[i,:] = Tensor([-1,1])
        elif target[i] == 1:
            t[i,:] = Tensor([1,-1])    
    return inp, t

nb_points = 1000
# create train set and respective labels
train_input , train_target = disk(nb_points)
# create test set and respective labels
test_input , test_target = disk(nb_points)

"""
target=torch.zeros(train_target.shape[0],2)-1
target[:,0][train_target==0]=1
target[:,1][train_target==1]=1
"""
#print(train_input, train_target, test_input, test_target)

# data normalization
# , std_train = train_input.mean() , train_input.std()
#train_input.sub_(mu_train).div_(std_train)
#mu_test , std_test = test_input.mean() , test_input.std()
#test_input.sub_(mu_test).div_(std_test)
# later see whether it is convenient to normalize component-wise with broadcasting (pag 66 week5)

# then do sanity check to see if the two classes are balanced
print(train_target[-1,:])



-1
 1
[torch.FloatTensor of size 2]



## Simple structure

In [119]:
class Module ( object ) :
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return []

#### Activation functions

In [120]:
def tanh(x):
    return x.tanh()

def dtanh(x):
    return 1-torch.tanh(x).pow(2)

In [121]:
def relu(x):
    return max(0,x)

def drelu(x):
    if x>0:
        out = 1
    elif x<=0:
        out = 0
    return out

class Relu(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        self.input = input
        return relu(input)
    
    def backward(self,output):
        return drelu(input)*output #to check, probably incorrect
    
    # here you need to add "def param" too

#### Loss functions

In [122]:
def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

## Linear Module

In [123]:
class Linear(Module):
    # in_features: size of each input sample
    # out_features: size of each output sample
    # bias: If set to False, the layer will not learn an additive bias. Default: ``True``

    # Attributes:
    # weight: the learnable weights of the module of shape (out_features x in_features)`
    # bias:   the learnable bias of the module of shape `(out_features)`

    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weights = Tensor(out_features,in_features).normal_(0,1e-1)
        self.bias = Tensor(out_features).zero_()
        self.dl_dw = Tensor(out_features,in_features).zero_()
        self.dl_db = Tensor(out_features).zero_()
                
    def forward(self,input):   # input sarebbe la x
        self.input = input
        self.s = torch.mv(self.weights,input)
        return self.s

    def backward(self, dl_ds):  
        dl_dx = self.weights.t().mv(dl_ds)   # the problem is here! 
        
        self.dl_dw.add_(dl_ds.view(-1, 1).mm(self.input.view(1, -1)))  
        self.dl_db.add_(dl_ds)
        return dl_dx     
    
    def update_param(self, eta):
        self.weights = self.weights - eta * self.dl_dw
        self.bias = self.bias - eta * self.dl_db
    
    def reset_param(self):
        self.dl_dw.zero_()
        self.dl_db.zero_()

## Activation Module

In [124]:
class Sigma(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        self.s = input
        return tanh(input)
    
    def backward(self, dl_dx):
        dl_ds = dtanh(self.s)*dl_dx
        return dl_ds
    
    def update_param(self, eta):
        pass 
    
    def reset_param(self):
        pass
        
        #return dtanh(input)*output    
        #return 4 * (output.exp() + output.mul(-1).exp()).pow(-2)

## Sequential Module

In [125]:
class Sequential(Module):
    def __init__(self, module_list):
        self.modules = module_list
        
    def forward(self, x):
        for module in module_list:
            x = module.forward(x)
            
        return x        
        
    def backward(self,dl_d):
        for module in reversed(module_list):
            dl_d = module.backward(dl_d)
    
    def update_param(self,eta):
        for module in module_list:
            module.update_param(eta)
    
    def reset_param(self):
        for module in module_list:
            module.reset_param()

## Loss Module

In [126]:
class Loss(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, input, target):
        self.input = input
        self.target = target
        return loss(input, target)
    
    def backward(self):
        return dloss(self.input, self.target)

## Framework

### Training

In [127]:
# Create my sequential list
# List of layers
input_size = 2
hidden_units = 25
output_size = 2

# three linear layers
fc1 = Linear(input_size,hidden_units)
fc2 = Linear(hidden_units,hidden_units)
fc3 = Linear(hidden_units,output_size)

# three activation layers
act1 = Sigma()
act2 = Sigma()
act3 = Sigma()

module_list = [fc1, act1, fc2, act2, fc3, act3]

# framework
model = Sequential(module_list)
model_loss = Loss()

# training set
train_input = train_input
train_target = train_target

# training parameters
lr = 0.001
nb_epochs = 20

# testing set
test_input = test_input
test_target = test_target

for k in range(0, nb_epochs):

    # TRAINING
    acc_loss = 0   # accumulated loss
    nb_train_errors = 0
    nb_test_errors = 0
    
    # zero the derivatives
    model.reset_param()
    
    # forward pass for all training samples
    for n in range(train_input.size(0)):
        x = train_input[n]
        t = train_target[n]
        x = model.forward(x)
        
        # compute the error
        pred = x.max(0)[1][0]  # the result is the index (0 or 1) of the position where the max value is
        if x[0]>-0.5:
            pred=0
        targ = train_target[n,:].max(0)[1][0]
        if targ != pred:
            nb_train_errors = nb_train_errors + 1 
               
        #loss
        ### forward
        loss_sample = model_loss.forward(input = x, target = t)
        acc_loss += loss_sample
        ### backward
        grad_loss = model_loss.backward()
        
        # backward pass
        model.backward(grad_loss)  
        
        # TESTING
        x_test = test_input[n]
        t_test = test_target[n]
        x_test = model.forward(x_test)
        
        # compute the error
        pred_test = x_test.max(0)[1][0]  # the result is the index (0 or 1) of the position where the max value is
        if x_test[0]>-0.5:
            pred_test=0
        targ_test = test_target[n,:].max(0)[1][0]
        if targ_test != pred_test:
            nb_test_errors = nb_test_errors + 1 
        
        
    # update the derivatives
    model.update_param(eta=lr)
    
    
        
    print('epoch {:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'.format(k+1,acc_loss,
                                                                            (100 * nb_train_errors) / train_input.size(0), (100 * nb_test_errors) / test_input.size(0) ))

epoch 1 acc_train_loss 2031.59 acc_train_error 87.10% test_error 89.40%
epoch 2 acc_train_loss 1583.34 acc_train_error 87.10% test_error 89.40%
epoch 3 acc_train_loss 592.64 acc_train_error 3.60% test_error 4.40%
epoch 4 acc_train_loss 583.85 acc_train_error 3.40% test_error 2.80%
epoch 5 acc_train_loss 582.60 acc_train_error 2.40% test_error 2.30%
epoch 6 acc_train_loss 582.33 acc_train_error 2.50% test_error 2.50%
epoch 7 acc_train_loss 582.13 acc_train_error 2.50% test_error 2.50%
epoch 8 acc_train_loss 581.95 acc_train_error 2.50% test_error 2.40%
epoch 9 acc_train_loss 581.78 acc_train_error 2.50% test_error 2.40%
epoch 10 acc_train_loss 581.61 acc_train_error 2.50% test_error 2.40%
epoch 11 acc_train_loss 581.46 acc_train_error 2.70% test_error 2.40%
epoch 12 acc_train_loss 581.31 acc_train_error 2.70% test_error 2.40%
epoch 13 acc_train_loss 581.16 acc_train_error 2.70% test_error 2.40%
epoch 14 acc_train_loss 581.03 acc_train_error 2.70% test_error 2.60%
epoch 15 acc_train_loss