In [63]:
import torch
from torch import Tensor
import numpy as np
import math

## Training set and test set

In [64]:
def disk(nb_points) :
    radius = 1/np.sqrt(2*np.pi)
    inp = Tensor(nb_points,2).uniform_(0,1)
    ratio = torch.floor(torch.norm(inp,p=2,dim=1)/radius)
    target = 1-torch.clamp(ratio,min=0,max=1)
    return inp, target

nb_points = 1000
# create train set and respective labels
train_input , train_target = disk(nb_points)
# create test set and respective labels
test_input , test_target = disk(nb_points)

#print(train_input, train_target, test_input, test_target)

# data normalization
mu_train , std_train = train_input.mean() , train_input.std()
train_input.sub_(mu_train).div_(std_train)
mu_test , std_test = test_input.mean() , test_input.std()
test_input.sub_(mu_test).div_(std_test)
# later see whether it is convenient to normalize component-wise with broadcasting (pag 66 week5)

# then do sanity check to see if the two classes are balanced


-0.7314 -0.0077
 1.6915  1.6529
 0.1061 -0.3005
       ⋮        
-0.7322 -0.9411
 1.0942 -1.2787
 1.3752 -0.1952
[torch.FloatTensor of size 1000x2]

## Simple structure

In [65]:
class Module ( object ) :
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return []

#### Activation functions

In [66]:
def tanh(x):
    return x.tanh()

def dtanh(x):
    return 1-torch.tanh(x).pow(2)


class Sigma(Module):
    def __init__(self):
        super().__init__()

    def forward(self,input):
        self.input = input
        return tanh(input)
    
    def backward(self,output):
        return dtanh(self.input)*output
    
    # here you need to add "def param" too

In [67]:
def relu(x):
    return max(0,x)

def drelu(x):
    if x>0:
        out = 1
    elif x<0:
        out = 0
    return out

class Relu(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        self.input = input
        return relu(input)
    
    def backward(self,output):
        return drelu(output) #to check, probably incorrect
    
    # here you need to add "def param" too

#### Loss functions

In [68]:
def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

class Loss(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, input, target):
        self.input = input
        return loss(input, target)
    
    def backward(self, output):
        return dloss(output,target)

In [93]:
# definition of Linear
class Linear(Module):
    # in_features: size of each input sample
    # out_features: size of each output sample
    # bias: If set to False, the layer will not learn an additive bias. Default: ``True``

    # Attributes:
    # weight: the learnable weights of the module of shape (out_features x in_features)`
    # bias:   the learnable bias of the module of shape `(out_features)`

    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weights = Tensor(out_features,in_features).normal_(0,1e-6)
        
    def forward(self,input):
        self.input = input
        #print('ecco i pesi:',self.weights)
        self.s = torch.mv(self.weights,input)
        return torch.mv(self.weights,input)
    
    def backward_last(self, output, target, dl_dw, dl_db, x_m): # output would be x3 
        dl_dx = dloss(output, target)
        dl_ds = Sigma().backward(self.s)*dl_dx
        dl_dw.add_(dl_ds.view(-1, 1).mm(x_m.view(1, -1)))  
        dl_db.add_(dl_ds)
        return dl_dw, dl_db, dl_ds
    
    def backward(self, dl_dw, dl_db, w_next, dl_ds_next, x_m):  
        # serve output negli argomenti, e target?? penso di no perchè li metto nel backward_last e poi non servono
        dl_dx = w_next.t().mv(dl_ds_next)
        dl_ds = Sigma().backward(self.s)*dl_dx
        dl_dw.add_(dl_ds.view(-1, 1).mm(x_m.view(1, -1)))   
        dl_db.add_(dl_ds)
        return dl_dw, dl_db, dl_ds      
    
    

In [94]:
# network structure QUI LA RISCRIVO PASSO PASSO PER VERIFICARE CHE FUNZIONI OGNI STEP
class Net(Module):
    def __init__(self):
        self.fc1 = Linear(input_size,hidden_units)  # first hidden layer
        self.fc2 = Linear(hidden_units,hidden_units) # second hidden layer
        self.fc3 = Linear(hidden_units,output_size) # third hidden layer
    
    """def forward(self,x):       # this is my original version
        x = self.fc1.forward(x)
        print('fc1 forward',x)
        x = Sigma().forward(x)
        print('sigma 1st time',x)
        x = self.fc2.forward(x)
        print('fc2 forward',x)
        x = Sigma().forward(x)
        print('sigma 2nd time',x)
        x = self.fc3.forward(x)
        print('fc3 forward',x)
        return x"""
    def forward(self,x):
        s1 = self.fc1.forward(x)
        print('fc1 forward',x)
        x1 = Sigma().forward(s1)
        print('sigma 1st time',x)
        s2 = self.fc2.forward(x1)
        print('fc2 forward',x)
        x2 = Sigma().forward(s2)
        print('sigma 2nd time',x)
        s3 = self.fc3.forward(x2)
        print('fc3 forward',x)
        x3 = s3
        return x1, s1, x2, s2, x3
    
    def backward(self, w1, b1, w2, b2, w3, b3,
                  t,
                  x, x1, x2, x3, s1, s2,  
                  dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3):  # modifica qui gli input non sono tutti necessari(?)
        x0 = x
        # last layer
        dl_dw3 , dl_db3, dl_ds3 = self.fc3.backward_last(output = x3, target = t, dl_dw = dl_dw3, dl_db = dl_db3, x_m = x2)
        # previous layers
        dl_dw2 , dl_db2 = self.fc2.backward(dl_dw = dl_dw2, dl_db = dl_db2, w_next = w3, dl_ds_next = dl_ds3, x_m = x1)
        dl_dw1 , dl_db1 = self.fc1.backward(dl_dw = dl_dw1, dl_db = dl_db1, w_next = w2, dl_ds_next = dl_ds2, x_m = x0)
                
        #return ? dl_dw , dl_db necessary ? 


In [95]:
prova = train_input[0]
istanza = Linear(in_features = len(train_input[0]), out_features = 2)
istanza.forward(prova)
###########################################à
istanzaNet = Net()  # qui chiami semplicemente __init__ di Net , che a sua volta chiama Linear, __init__ di Linear in particolare
prova = train_input[0]
#istanzaNet.forward(prova)  # qui sto chiamando il forward di Net. Ma leggi bene il forward di Net: a sua volta chiama un forward, che è quello di Linear
x = train_input[0]
t = train_target[0]
x1, s1, x2, s2, x3 = istanzaNet.forward(x)
istanzaNet.backward(w1, b1, w2, b2, w3, b3,
                  t,
                  x, x1, x2, x3, s1, s2,  
                  dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3)


fc1 forward 
 1.0492
 1.6063
[torch.FloatTensor of size 2]

sigma 1st time 
 1.0492
 1.6063
[torch.FloatTensor of size 2]

fc2 forward 
 1.0492
 1.6063
[torch.FloatTensor of size 2]

sigma 2nd time 
 1.0492
 1.6063
[torch.FloatTensor of size 2]

fc3 forward 
 1.0492
 1.6063
[torch.FloatTensor of size 2]



AttributeError: 'Sigma' object has no attribute 'input'

### Build a network with 2 input units, 2 output units, 3 hidden layers with 25 units

In [72]:
# network parameters
input_size = 2
output_size = 2
hidden_units = 25

In [73]:
# network structure I AM NOT USING THIS ONE
class Net(Module):
    def __init__(self):
        self.fc1 = Linear(input_size,hidden_units)  # first hidden layer
        self.fc2 = Linear(hidden_units,hidden_units) # second hidden layer
        self.fc3 = Linear(hidden_units,output_size) # third hidden layer
    
    def forward(self,x):
        x = self.fc1.forward(x)
        x = relu().forward(x)
        x = self.fc2(x)
        x = relu().forward(x)
        x = self.fc3(x)
        return x 
    
    def backward(w1, b1, w2, b2, w3, b3,
                  t,
                  x, s1, x1, s2, x2, s3, x3,
                  dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3):
        x0 = x
        dl_dx3 = dloss(x3, t)
        dl_ds3 = dsigma(s3) * dl_dx3
        dl_dx2 = w3.t().mv(dl_ds3)
        dl_ds2 = dsigma(s2) * dl_dx2
        dl_dx1 = w2.t().mv(dl_ds2)
        dl_ds1 = dsigma(s1) * dl_dx1

        dl_dw3.add_(dl_ds3.view(-1, 1).mm(x1.view(1, -1)))
        dl_db3.add_(dl_ds3)
        dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
        dl_db2.add_(dl_ds2)
        dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
        dl_db1.add_(dl_ds1)


## Training

In [23]:
# training set
train_input = train_input
train_target = train_target
# network parameters
model = Net()
input_size = 2
output_size = 2
hidden_units = 25
# training parameters
lr = 0.1
nb_epochs = 10
# create weight and bias vectors
eps = 1e-6
w1 = Tensor(hidden_units, input_size).normal_(0, eps)
b1 = Tensor(hidden_units).normal_(0, eps)
w2 = Tensor(hidden_units, hidden_units).normal_(0, eps)
b2 = Tensor(hidden_units).normal_(0, eps)
w3 = Tensor(output_size, hidden_units).normal_(0,eps)
b3 = Tensor(output_size).normal_(0, eps)
# create derivatives
dl_dw1 = Tensor(w1.size())
dl_db1 = Tensor(b1.size())
dl_dw2 = Tensor(w2.size())
dl_db2 = Tensor(b2.size())
dl_dw3 = Tensor(w3.size())
dl_db3 = Tensor(b3.size())

#prova = .forward(train_input[0])


In [31]:
train_input[0]

TypeError: max received an invalid combination of arguments - got (out=NoneType, axis=NoneType, ), but expected one of:
 * no arguments
 * (torch.FloatTensor other, *, torch.FloatTensor out)
      didn't match because some of the keywords were incorrect: axis
 * (int dim, *, tuple[torch.FloatTensor, torch.LongTensor] out)
 * (int dim, bool keepdim, *, tuple[torch.FloatTensor, torch.LongTensor] out)
