In [1]:
from torch import Tensor
#from torch import LongTensor
import math
import numpy as np

## Generate dataset:

In [2]:
1/math.sqrt(2*math.pi)

0.3989422804014327

In [3]:
def generate_disc_set(nb):
    input = Tensor(nb, 2).uniform_(0,1)
    R = 1/math.sqrt(2*math.pi) # Radius of the disk
    target = (R - input.pow(2).sum(1).sqrt()).sign()#.long()
    target.add_(1).div_(2) # to transform [-1,1] into [0,1]
    #target = input.pow(2).sum(1).mul(-1).add(1 / 2/ math.pi).sign().add(1).div(2).long() # prof version
    return input, target


train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mu, std = train_input.mean(0),train_input.std(0)
train_input.sub_(mu).div_(std)
mu, std = test_input.mean(0),test_input.std(0)
test_input.sub_(mu).div_(std)

#train_input, train_target = Variable(train_input), Variable(train_target)
#test_input, test_target = Variable(test_input), Variable(test_target)

# one hot?
#one_hot_targets = np.eye(2)[train_target]
#train_target = one_hot_targets
#train_target = Tensor(train_target)

mini_batch_size = 100
print (train_input.size(), train_target.size())
#print(train_input[0:10],train_target[0:10])

torch.Size([1000, 2]) torch.Size([1000])


### Activation functions:

In [4]:
def sigma_tanh(x):
    return x.tanh()

def dsigma_tanh(x):
    return (x.exp() + x.mul(-1).exp()).pow(-2)

def sigma_relu(x):
    if x>0 : return x
    else : return 0

#def dsigma_relu(x):

sigma = sigma_tanh
dsigma = dsigma_tanh

In [5]:
train_target[0:10]


 0
 0
 0
 1
 1
 0
 0
 0
 0
 0
[torch.FloatTensor of size 10]

### suggested structure:

In [6]:
class Module ( object ) :
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return []
    
# prof version

######################################################################

def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

######################################################################
# from F
class Model(Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

def linear(input, weight, bias=None):
    
    output = input.matmul(weight.t())
    if bias is not None:
        output += bias
    return output


class Linear(Module):
    
    def __init__(self, in_features, out_features, bias=True):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input):
        return F.linear(input, self.weight, self.bias)

######################################################################
def linear_forward(x,w,b):
    s = w.mv(x) + b
    x = sigma(s)
    return s,x

def linear_backward(x0, x, dl_ds_prev, dl_dw, dl_db, initial = False):
    
    dl_dx = w.t().mv(dl_ds_prev)
    dl_ds = dsigma(s1) * dl_dx   
    dl_dw.add_(dl_ds.view(-1, 1).mm(x0.view(1, -1)))
    dl_db.add_(dl_ds)

def forward_pass(ws, bs, x, test = False):
    x0 = x
    
    s1, x1 = linear_forward(x0,ws[0],bs[0])
    s2, x2 = linear_forward(x1,ws[1],bs[1])
    
    xs = [x1, x2]
    ss = [s1, s2]
    if test: 
        return xs[-1]
    return x0, xs, ss


def backward_pass(ws, bs,
                  t,
                  x, xs, ss,
                  dl_dws, dl_dbs):
    x0 = x
    
    dl_dx2 = dloss(xs[-1], t)
    dl_ds2 = dsigma(ss[2-1]) * dl_dx2
    dl_dws[2-1].add_(dl_ds2.view(-1, 1).mm(xs[1-1].view(1, -1)))
    dl_dbs[2-1].add_(dl_ds2)
    
    dl_dx1 = ws[2-1].t().mv(dl_ds2) # w2
    dl_ds1 = dsigma(ss[1-1]) * dl_dx1   
    
    dl_dws[1-1].add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_dbs[1-1].add_(dl_ds1)

# Training:

In [61]:


zeta = 0.9
train_input = train_input
test_input = test_input

nb_hidden=50
print ('train_input.size(): ', train_input.size())
nb_classes = 2 #train_target.size(1) 
nb_train_samples = train_input.size(0)

eta = 0.1 / train_target.size(0)

eps = 1e-6

# weights and biases
w1 = Tensor(nb_hidden, train_input.size(1)).normal_(0,1)
b1 = Tensor(nb_hidden).normal_(0,1)
w2 = Tensor(nb_classes, nb_hidden).normal_(0,eps)
b2 = Tensor(nb_classes).normal_(0,eps)

# lists
ws = [w1, w2]
bs = [b1, b2]
# derivatives of the losse wrt weights and biases
dl_dws = []
dl_dbs = []
for w in ws:
    dl_dws.append(Tensor(w.size()))
for b in bs:
    dl_dbs.append(Tensor(b.size()))


epochs = 250
for k in range (0,epochs):
    
    acc_loss = 0
    nb_train_errors = 0
    
    # set the storage to 0
    for i in range(0, len(dl_dws)):
        dl_dws[i].zero_()
        dl_dbs[i].zero_()
    
    # for each sample run forward and backward pass
    for n in range(0, nb_train_samples):
        
        # run forward pass
        x0, xs, ss = forward_pass(ws, bs, train_input[n])
        
        # prediction is the maximum predicted class
        
        predicted = xs[-1].max(dim = 0)[1] # dim is the axis, 1 for taking index, 0 to just select the value
        pred = predicted [0]
        #print(predicted)
        #pred = xs[-1][0]
        #print(xs[-1])
        #print(pred)
        # check wether the target was 1 or -1 --> verify if positif
        if train_target[n] != int(pred) : nb_train_errors = nb_train_errors + 1 # if == -1 lets say :p 
        acc_loss += loss(Tensor(1).fill_(pred), train_target[n])
        #acc_loss += loss(pred, train_target[n])

        # run backward pass
        backward_pass(ws, bs,
                      train_target[n],
                      x0, xs, ss,
                      dl_dws, dl_dbs)
    
    # Gradient step
    for i in range(0, len(ws)):
        ws[i] = ws[i] - eta * dl_dws[i]
        bs[i] = bs[i] - eta * dl_dbs[i]

    # Test error
    nb_test_errors = 0
    for n in range(0, test_input.size(0)):
        output = forward_pass(ws, bs, test_input[n], test=True)

        pred = output.max(0)[1][0]
        if test_target[n] != int(output[0]) : nb_test_errors = nb_test_errors + 1  


    print(k,' --> acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))

train_input.size():  torch.Size([1000, 2])
0  --> acc_train_loss 473.00 acc_train_error 47.30% test_error 12.40%
1  --> acc_train_loss 486.00 acc_train_error 48.60% test_error 12.40%
2  --> acc_train_loss 480.00 acc_train_error 48.00% test_error 12.40%
3  --> acc_train_loss 454.00 acc_train_error 45.40% test_error 12.40%
4  --> acc_train_loss 456.00 acc_train_error 45.60% test_error 12.40%
5  --> acc_train_loss 452.00 acc_train_error 45.20% test_error 12.40%
6  --> acc_train_loss 430.00 acc_train_error 43.00% test_error 12.40%
7  --> acc_train_loss 426.00 acc_train_error 42.60% test_error 12.40%
8  --> acc_train_loss 426.00 acc_train_error 42.60% test_error 12.40%
9  --> acc_train_loss 424.00 acc_train_error 42.40% test_error 12.40%
10  --> acc_train_loss 424.00 acc_train_error 42.40% test_error 12.40%
11  --> acc_train_loss 428.00 acc_train_error 42.80% test_error 12.40%
12  --> acc_train_loss 427.00 acc_train_error 42.70% test_error 12.40%
13  --> acc_train_loss 429.00 acc_train_erro

116  --> acc_train_loss 477.00 acc_train_error 47.70% test_error 12.40%
117  --> acc_train_loss 471.00 acc_train_error 47.10% test_error 12.40%
118  --> acc_train_loss 481.00 acc_train_error 48.10% test_error 12.40%
119  --> acc_train_loss 474.00 acc_train_error 47.40% test_error 12.40%
120  --> acc_train_loss 475.00 acc_train_error 47.50% test_error 12.40%
121  --> acc_train_loss 466.00 acc_train_error 46.60% test_error 12.40%
122  --> acc_train_loss 472.00 acc_train_error 47.20% test_error 12.40%
123  --> acc_train_loss 475.00 acc_train_error 47.50% test_error 12.40%
124  --> acc_train_loss 480.00 acc_train_error 48.00% test_error 12.40%
125  --> acc_train_loss 476.00 acc_train_error 47.60% test_error 12.40%
126  --> acc_train_loss 467.00 acc_train_error 46.70% test_error 12.40%
127  --> acc_train_loss 468.00 acc_train_error 46.80% test_error 12.40%
128  --> acc_train_loss 471.00 acc_train_error 47.10% test_error 12.40%
129  --> acc_train_loss 472.00 acc_train_error 47.20% test_error

230  --> acc_train_loss 483.00 acc_train_error 48.30% test_error 12.40%
231  --> acc_train_loss 482.00 acc_train_error 48.20% test_error 12.40%
232  --> acc_train_loss 493.00 acc_train_error 49.30% test_error 12.40%
233  --> acc_train_loss 494.00 acc_train_error 49.40% test_error 12.40%
234  --> acc_train_loss 498.00 acc_train_error 49.80% test_error 12.40%
235  --> acc_train_loss 488.00 acc_train_error 48.80% test_error 12.40%
236  --> acc_train_loss 498.00 acc_train_error 49.80% test_error 12.40%
237  --> acc_train_loss 489.00 acc_train_error 48.90% test_error 12.40%
238  --> acc_train_loss 492.00 acc_train_error 49.20% test_error 12.40%
239  --> acc_train_loss 513.00 acc_train_error 51.30% test_error 12.40%
240  --> acc_train_loss 513.00 acc_train_error 51.30% test_error 12.40%
241  --> acc_train_loss 497.00 acc_train_error 49.70% test_error 12.40%
242  --> acc_train_loss 496.00 acc_train_error 49.60% test_error 12.40%
243  --> acc_train_loss 496.00 acc_train_error 49.60% test_error