In [7]:
import torch
from torch import Tensor
import numpy as np
import math

print (torch.__version__)

0.1.12


## Training set and test set

In [9]:
def disk(nb_points) :
    radius = 1/np.sqrt(2*np.pi)
    inp = Tensor(nb_points,2).uniform_(0,1)
    ratio = torch.floor(torch.norm(inp,p=2,dim=1)/radius)
    target = 1-torch.clamp(ratio,min=0,max=1)
    t = Tensor(nb_points,2)
    print('ciao',target)
    for i in range(nb_points):
        if target[i].numpy() == 0:
            t[i,:] = Tensor([-1,1])
        elif target[i].numpy() == 1:
            t[i,:] = Tensor([1,-1])
    print(t)    
    return inp, t

nb_points = 1000
# create train set and respective labels
train_input , train_target = disk(nb_points)
# create test set and respective labels
test_input , test_target = disk(nb_points)

"""
target=torch.zeros(train_target.shape[0],2)-1
target[:,0][train_target==0]=1
target[:,1][train_target==1]=1
"""
#print(train_input, train_target, test_input, test_target)

# data normalization
# , std_train = train_input.mean() , train_input.std()
#train_input.sub_(mu_train).div_(std_train)
#mu_test , std_test = test_input.mean() , test_input.std()
#test_input.sub_(mu_test).div_(std_test)
# later see whether it is convenient to normalize component-wise with broadcasting (pag 66 week5)

# then do sanity check to see if the two classes are balanced
print(train_target[-1,:])

ciao 
    0
    0
    0
  ⋮   
    0
    0
    0
[torch.FloatTensor of size 1000x1]


   -1     1
   -1     1
   -1     1
     ⋮      
   -1     1
   -1     1
   -1     1
[torch.FloatTensor of size 1000x2]

ciao 
    1
    0
    0
  ⋮   
    1
    0
    0
[torch.FloatTensor of size 1000x1]


    1    -1
   -1     1
   -1     1
     ⋮      
    1    -1
   -1     1
   -1     1
[torch.FloatTensor of size 1000x2]


-1
 1
[torch.FloatTensor of size 2]



## Simple structure

In [10]:
class Module ( object ) :
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return []

#### Activation functions

In [11]:
def tanh(x):
    return x.tanh()

def dtanh(x):
    return 1-torch.tanh(x).pow(2)


class Sigma(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        return tanh(input)
    
    def backward(self, output):
        #return dtanh(input)*output
        return 4 * (output.exp() + output.mul(-1).exp()).pow(-2)
    
    # here you need to add "def param" too

In [12]:
def relu(x):
    return max(0,x)

def drelu(x):
    if x>0:
        out = 1
    elif x<0:
        out = 0
    return out

class Relu(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        self.input = input
        return relu(input)
    
    def backward(self,output):
        return drelu(output) #to check, probably incorrect
    
    # here you need to add "def param" too

#### Loss functions

In [13]:
def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

class Loss(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, input, target):
        self.input = input
        return loss(input, target)
    
    def backward(self, output):
        return dloss(output,target)

In [14]:
# definition of Linear
class Linear(Module):
    # in_features: size of each input sample
    # out_features: size of each output sample
    # bias: If set to False, the layer will not learn an additive bias. Default: ``True``

    # Attributes:
    # weight: the learnable weights of the module of shape (out_features x in_features)`
    # bias:   the learnable bias of the module of shape `(out_features)`

    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weights = Tensor(out_features,in_features).normal_(0,1e-6)
        self.bias = Tensor(out_features).zero_()
        self.dl_dw = Tensor(out_features,in_features).zero_()
        self.dl_db = Tensor(out_features).zero_()
                
    def forward(self,input):
        self.input = input
        #print('ecco i pesi:',self.weights)
        self.s = torch.mv(self.weights,input)
        return torch.mv(self.weights,input)
    
    def backward_last(self, output, target): # output would be x3 
        dl_dx = dloss(output, target)
        dl_ds = Sigma().backward(self.s)*dl_dx 
        self.dl_dw.add_(dl_ds.view(-1, 1).mm(self.input.view(1, -1)))  
        self.dl_db.add_(dl_ds)
        #print('last layer',self.dl_dw,'dl_ds', dl_ds)
        return dl_ds, self.weights
    
    def backward(self, dl_ds_next, w_next):  
        #dl_dx = self.weights.t().mv(dl_ds_next)   # the problem is here! 
        dl_dx = w_next.t().mv(dl_ds_next)
        dl_ds = Sigma().backward(self.s)*dl_dx 
        self.dl_dw.add_(dl_ds.view(-1, 1).mm(self.input.view(1, -1)))   
        self.dl_db.add_(dl_ds)
        return dl_ds, self.weights      
    
    

In [None]:
class Net(Module):
    def __init__(self):
        self.fc1 = Linear(input_size,hidden_units)  # first hidden layer
        self.fc2 = Linear(hidden_units,hidden_units) # second hidden layer
        self.fc3 = Linear(hidden_units,output_size) # third hidden layer
   
    def forward(self,x):
        s1 = self.fc1.forward(x)
        #print('fc1 forward',x)
        x1 = Sigma().forward(s1)
        #print('sigma 1st time',x)
        s2 = self.fc2.forward(x1)
        #print('fc2 forward',x)
        x2 = Sigma().forward(s2)
        #print('sigma 2nd time',x)
        s3 = self.fc3.forward(x2)
        #print('fc3 forward',x)
        x3 = Sigma().forward(s3)
        return x3
    
    def backward(self, t, x3):  
        # last layer
        dl_ds3, w3 = self.fc3.backward_last(output = x3, target = t)
        # previous layers
        dl_ds2, w2 = self.fc2.backward(dl_ds_next = dl_ds3, w_next = w3)
        dl_ds1, w1 = self.fc1.backward(dl_ds_next = dl_ds2, w_next = w2)
        #print('dl_dw3',dl_dw3, 'dl_dw2', dl_dw2, 'dl_dw1', dl_dw1)        


### Build a network with 2 input units, 2 output units, 3 hidden layers with 25 units

In [16]:
# network parameters
input_size = 2
output_size = 2
hidden_units = 25

In [19]:
train_target[0]


-1
 1
[torch.FloatTensor of size 2]

## Training

In [20]:
# training set
train_input = train_input
train_target = train_target
# network parameters
model = Net()
input_size = 2
output_size = 2
hidden_units = 25
# training parameters
lr = 10
nb_epochs = 100

for k in range(0, nb_epochs):

    # Back-prop

    acc_loss = 0
    nb_train_errors = 0

    model.fc1.dl_dw.zero_()
    model.fc1.dl_db.zero_()
    model.fc2.dl_dw.zero_()
    model.fc2.dl_db.zero_()
    model.fc3.dl_dw.zero_()
    model.fc3.dl_db.zero_()
    
    #print('cycle',k,'before',model.fc1.dl_dw)
    for n in range(0, train_input.size(0)):
    #for n in range(0, 25):
        x = train_input[n]
        t = train_target[n]
        x3 = model.forward(x)
                       
        pred = x3.max(0)[1][0]  # the result is the index (0 or 1) of the position where the max value is
        targ = train_target[n,:].max(0)[1][0]
        if targ != pred:
            nb_train_errors = nb_train_errors + 1 
            
        #print('sample num',n,'x3',x3,'pred x3',pred,'targ',targ,'err',nb_train_errors)
        
        acc_loss += loss(x3, t)  
        #if k > 35 : 
            #myloss = loss(x3,t)
            #print('sample',n,'x3',x3,'target',t,'myloss',myloss,'acc_loss',acc_loss)
        #acc_loss += (x3 - t).pow(2).sum()
        
        
        model.backward(t, x3)
    #print('dl_dw1',dl_dw1)
    
    # Gradient step
    #print('cycle',k,'after',model.fc1.dl_dw)
    model.fc1.weights = model.fc1.weights - lr * model.fc1.dl_dw
    model.fc1.bias = model.fc1.bias - lr * model.fc1.dl_db
    model.fc2.weights = model.fc2.weights - lr * model.fc2.dl_dw
    model.fc2.bias = model.fc2.bias - lr * model.fc2.dl_db
    model.fc3.weights = model.fc3.weights - lr * model.fc3.dl_dw
    model.fc3.bias = model.fc3.bias - lr * model.fc3.dl_db
    #print('cycle',k,'weights for layer 1 are', model.fc1.weights)
    
    print('epoch {:d} acc_train_loss {:.02f} acc_train_error {:.02f}% , magnitude x3 {:.2g}'.format(k,acc_loss,
                                                                            (100 * nb_train_errors) / train_input.size(0) , x3[0]))
     



epoch 0 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -2.4e-18
epoch 1 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -1.1e-17
epoch 2 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -2.1e-17
epoch 3 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -3.6e-17
epoch 4 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -5.7e-17
epoch 5 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -9e-17
epoch 6 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -1.4e-16
epoch 7 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -2.4e-16
epoch 8 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -4.2e-16
epoch 9 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -7.8e-16
epoch 10 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -1.7e-15
epoch 11 acc_train_loss 2000.00 acc_train_error 11.50% , magnitude x3 -4.1e-15
epoch 12 acc_train_loss 2000.00 acc_train_error 11.50% , magnitu

In [18]:
"""   
# Test error

    nb_test_errors = 0

    for n in range(0, test_input.size(0)):
        _, _, _, _, x3 = model.forward(x)

        pred = x3.max(0)[1][0]
        if test_target[n, pred] < 0: nb_test_errors = nb_test_errors + 1

    print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(k,
                  acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))
"""

"   \n# Test error\n\n    nb_test_errors = 0\n\n    for n in range(0, test_input.size(0)):\n        _, _, _, _, x3 = model.forward(x)\n\n        pred = x3.max(0)[1][0]\n        if test_target[n, pred] < 0: nb_test_errors = nb_test_errors + 1\n\n    print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'\n          .format(k,\n                  acc_loss,\n                  (100 * nb_train_errors) / train_input.size(0),\n                  (100 * nb_test_errors) / test_input.size(0)))\n"