In [1]:
import torch

import dlc_practical_prologue as prologue
import math
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x2605a952688>

# Implementing modules

In [2]:
'''
'''
class Module ( object ) :
    def __call__(self, *args, **kwargs):
        return self.forward(*args)
    def forward ( self , * input ) :
        raise NotImplementedError
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
    def param ( self ) :
        return []



In [3]:

class Linear(Module):
    
    '''
    Applies a linear transformation to the incoming data: y = xA^T + b
    
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``

    Attributes:
        weight: the learnable weights of the module 
        bias:   the learnable bias of the module of shape 
        

    '''
    def __init__(self, in_features, out_features, bias = True):
        self.in_features = in_features
        self.out_features = out_features

        
        # Initialize weight and bias
        # according to https://discuss.pytorch.org/t/how-are-layer-weights-and-biases-initialized-by-default/13073
        
        if bias:
            self.bias = torch.empty(1,out_features)
        self.weight = torch.empty(in_features,out_features)
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)
        
        # cache the input whenever we go forward
        self.x = 0
        self.grad_weight = torch.empty(in_features,out_features)
        self.grad_bias = torch.empty(1,out_features)
            
        
    #Applies a linear transformation to the incoming data: y = xA^T + b   
    def forward(self,input):
        self.x = input
        output = input.mm(self.weight)
        if self.bias is not None:
            output += self.bias

        return output


    def backward(self,input):
        x = self.x
        w = self.weight
        b = self.bias
        
        # divide weigthts by batch size 
        # inspiration from https://stats.stackexchange.com/questions/183840/sum-or-average-of-gradients-in-mini-batch-gradient-decent
        db = input.sum(0).div(input.size()[0])
        dx = input.mm(w.t())
        dw = x.t().mm(input).div(input.size()[0])
        
        

        self.grad_bias += db
        self.grad_weight += dw


        return dx
            

    def param(self):
        return[(self.weight,self.grad_weight),(self.bias,self.grad_bias)]




In [4]:
'''
Applies the element-wise function Rectified Linear Unit

Args:
    in_features: size of each input sample
    out_features: size of each output sample
    bias: If set to ``False``, the layer will not learn an additive bias.
        Default: ``True``

'''
class ReLU(Module):
    def __init_(self):
        self.x = 0
    def forward( self,  input ):
        self.x = input
        s1 = input.clamp(min=0)
        return s1
    def backward(self, input):
        return (self.x>0).float()*input
    
        
        
    

In [5]:
'''
Applies the element-wise function Hyperbolic Tangent

'''
# TODO change implementation
class Tanh(Module):
    def forward(self,input):
        s1 = input.tanh()
        return s1
    def backward(self,input):
        x1 = 4 * (torch.exp(input) + torch.exp(torch.mul(input,-1))).pow(-2)
        return x1
        

In [6]:
'''
A sequential container.
Modules are added to it in the order they are passed in the constructor - in a list.

'''
class Sequential(Module):
    def __init__(self, param ):
        super().__init__()
        self.model = (param)
        
    def forward(self,x):
        for _ in self.model:
            x = _.forward(x)
        return x
    def backward(self,x):
        for _ in reversed(self.model):
            x = _.backward(x)
        return x
    
    def param(self):
        param_list = []
        for module in self.model:
            param_list.extend(module.param())

        return param_list

In [7]:
'''
Returns mean square error loss
'''
#TODO change to MSE => mean instead of sum
class MSELoss(Module):
    def forward(self,v,t):
        return (v-t).pow(2).sum(0).sum()
    def dloss(self,v,t):
        return (2*(v-t))
        
        

In [8]:
#TODO change this
class SGD(object):

    def __init__(self, params, lr=0.01):
        self.lr = lr
        self.params = params


    def step(self):

        for i, (p, p_grad) in enumerate(self.params):
            Vt = p_grad

            # update parameter

            p.add_(-self.lr*Vt)
    
    def zero_grad(self):

        for param in self.params:
            param[1].zero_()

# Loading the data

In [9]:
#TODO have a better disc set
def generate_disc_set(nb):
    train = torch.empty(nb,2).uniform_(-1,1)
    target = (train.pow(2).sum(1)<torch.empty(nb).fill_(math.sqrt(2/math.pi))).long()
    return train,target

In [10]:
#TODO change conversion
def target_to_onehot(target):
    res = torch.empty(target.size(0), 2).zero_()
    res.scatter_(1, target.view(-1, 1), 1.0).mul(0.9)
    return res

In [11]:
#TODO graph to see the data

In [12]:
train_input,train_target = generate_disc_set(1000)
test_input,test_target = generate_disc_set(1000)

mu,std = train_input.mean(0), train_input.std(0)
train_input.sub_(mu).div_(std)

mu,std = test_input.mean(0), test_input.std(0)
test_input.sub_(mu).div_(std)

tensor([[ 1.0167,  0.3364],
        [ 0.3039,  0.9305],
        [ 1.1378, -0.6998],
        ...,
        [ 1.4785,  1.5781],
        [ 1.2005, -0.4963],
        [-0.1300,  1.5770]])

In [13]:
train_target = target_to_onehot(train_target)


In [14]:
test_target = target_to_onehot(test_target)

In [15]:
batch_size = 100
n_epochs = 250
def train_model(model,train_input,train_target):
    criterion = MSELoss()
    optimizer = SGD(model.param(),lr = 0.01)
    for e in range(0,n_epochs):
        for input, targets in zip(train_input.split(batch_size),train_target.split(batch_size)):
            output = model(input)
            loss = criterion(output,targets)
            optimizer.zero_grad()
            model.backward(criterion.dloss(output,targets))
            optimizer.step()
            if e%50==0 :
                print('epoch: ',e,' loss: ',loss)

In [16]:
model = Sequential((Linear(2,25),Tanh(),Linear(25,25),Tanh(),Linear(25,25),Tanh(),Linear(25,2)))

In [23]:
train_model(model,train_input,train_target)

epoch:  0  loss:  tensor(12.1567)
epoch:  0  loss:  tensor(12.1539)
epoch:  0  loss:  tensor(13.3545)
epoch:  0  loss:  tensor(15.3937)
epoch:  0  loss:  tensor(10.0684)
epoch:  0  loss:  tensor(14.8465)
epoch:  0  loss:  tensor(13.3533)
epoch:  0  loss:  tensor(13.1386)
epoch:  0  loss:  tensor(14.6776)
epoch:  0  loss:  tensor(13.8126)
epoch:  50  loss:  tensor(11.9288)
epoch:  50  loss:  tensor(11.9749)
epoch:  50  loss:  tensor(13.1088)
epoch:  50  loss:  tensor(15.0257)
epoch:  50  loss:  tensor(9.8586)
epoch:  50  loss:  tensor(14.5178)
epoch:  50  loss:  tensor(13.0933)
epoch:  50  loss:  tensor(12.8365)
epoch:  50  loss:  tensor(14.4872)
epoch:  50  loss:  tensor(13.5364)
epoch:  100  loss:  tensor(11.6613)
epoch:  100  loss:  tensor(11.7632)
epoch:  100  loss:  tensor(12.8244)
epoch:  100  loss:  tensor(14.6032)
epoch:  100  loss:  tensor(9.6106)
epoch:  100  loss:  tensor(14.1451)
epoch:  100  loss:  tensor(12.7881)
epoch:  100  loss:  tensor(12.5033)
epoch:  100  loss:  tens

In [24]:
            
def compute_nb_errors(model,data_input,data_target):
    nb_errors = 0
    for input,targets in zip(data_input.split(batch_size),data_target.split(batch_size)):
        output = model(input)
        _,predicted_classes = torch.max(output,1)
        for i in range(0,output.size(0)):
            if(targets[i][predicted_classes[i]]!=1):
                nb_errors = nb_errors+1
                
    return nb_errors

In [25]:
compute_nb_errors(model,train_input,train_target)/1000 * 100

2.0

In [26]:
compute_nb_errors(model,test_input,test_target)/1000 * 100

4.0