In [1]:
from torch import Tensor
#from torch import LongTensor
import torch
import math
import numpy as np

## Generate dataset:

In [2]:
1/math.sqrt(2*math.pi)

0.3989422804014327

In [3]:
def generate_disc_set(nb):
    input = Tensor(nb, 2).uniform_(0,1)
    R = 1/math.sqrt(2*math.pi) # Radius of the disk
    target = (R - input.pow(2).sum(1).sqrt()).sign()#.long()
    target.add_(1).div_(2) # to transform [-1,1] into [0,1]
    #target = input.pow(2).sum(1).mul(-1).add(1 / 2/ math.pi).sign().add(1).div(2).long() # prof version
    return input, target


train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mu, std = train_input.mean(),train_input.std()
train_input.sub_(mu).div_(std)
mu, std = test_input.mean(),test_input.std()
test_input.sub_(mu).div_(std)

#train_input, train_target = Variable(train_input), Variable(train_target)
#test_input, test_target = Variable(test_input), Variable(test_target)

# one hot?
#one_hot_targets = np.eye(2)[train_target]
#train_target = one_hot_targets
#train_target = Tensor(train_target)

mini_batch_size = 100
print (train_input.size(), train_target.size())
#print(train_input[0:10],train_target[0:10])
print(train_target.mean())

torch.Size([1000, 2]) torch.Size([1000, 1])
0.133


# Baseline

In [4]:
import numpy as np
from sklearn import linear_model
def add_squared(data):
    square = np.power(data,2)[:,0]+np.power(data,2)[:,1].reshape(1,-1)
    square = np.transpose(square)
    return np.append(data,square,axis=1)

# train the model
X = train_input.numpy()
X = add_squared(X)
Y = train_target.numpy()
clf = linear_model.SGDClassifier(max_iter=5000)
clf.fit(X, Y.ravel())

# test the function
test = test_input.numpy()
test = add_squared(test)
Y_test = test_target.numpy()
nb_errors = 0
for i,x in enumerate(clf.predict(test)):
    if x!= Y_test[i]:
        nb_errors += 1
print(nb_errors/test.shape[0]*100,'%')

1.9 %


### Activation functions:

In [5]:
def tanh(x):
    return x.tanh()

def d_tanh(x):
    return (x.exp() + x.mul(-1).exp()).pow(-2)

def relu(x):
    if x>0 : return x
    else : return x.fill(0)
    
def d_relu(x):
    if x>o: return x.fill(1)
    else : return x.fill(0)

def mse(x,t):
    return (x - t).pow(2).sum()

#def dsigma_relu(x):

sigma = tanh
dsigma = d_tanh

### suggested structure:

In [6]:
class Module ( object ) :
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def param ( self ) :
        return [] 
    
class Linear(Module):
    
    def __init__(self, in_features, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.Tensor(out_features, in_features)
        self.bias   = torch.Tensor(out_features)
        
        self.d_weight = torch.Tensor(out_features, in_features).fill_(0.0)
        print(self.weight.size())
        self.d_bias   = torch.Tensor(out_features).fill_(0.0)
        
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1)) # *sqrt(3)?
        self.weight.uniform_(-stdv, stdv)
        self.bias.uniform_(-stdv, stdv)

    def forward(self, input):
        self.input = torch.mv(self.weight, input.view(-1))
        #self.input = torch.add(self.input,self.bias)
        return self.input
        
    def backward ( self ,d_output ) :
        tmp = d_output.view(-1, 1) * (self.input.view(1, -1))
        print(tmp.size())
        self.d_weight.add_(tmp) 
        self.d_bias.add(d_output)
        d_input = self.weight.t() * d_output #mv()
        return d_input
    
   # self._gradient.add_(d_dy.view(-1,1)*self._input.view(1,-1))        
    
def backward_pass(w1, b1, w2, b2,
              t,
              x, s1, x1, s2, x2,
              dl_dw1, dl_db1, dl_dw2, dl_db2):
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1

    dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
    dl_db2.add_(dl_ds2)
    dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_db1.add_(dl_ds1)

class Tanh(Module):
    
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        self.input = input
        return self.input.tanh()
    
    def backward(self, d_output):
        x = self.input
        d_input = d_tanh(x) * d_output
        return d_input
     


class LossMSE(Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,x,t):
        self.input=x
        d_input = mse(self.input,t)
        return d_input
        
    def backward(self):
        d_output = 2 * self.input
        return d_output
    
class Sequential(Module):

    def __init__(self,modules):
        super().__init__()
        self.modules = modules
        self.loss = LossMSE()
        
    
    #done by luca
    def add(self, module):
        self.modules.append(module)

    def forward(self, input, target):
        for module in self.modules:
            input = module.forward(input) # module.forward?
        return input, self.loss.forward(input, target)
    
    def backward(self):
        out = self.loss.backward()# arguments??
        n = len(self.modules)-1 # -1 for the loss?
        for i in range(0,n):
            out = self.modules[n-i].backward(out)
        return out
            


In [7]:
hidden = 20
lay1 = Linear(2,hidden)
lay2 = Linear(hidden,2)
layers = [lay1,lay2]
net = Sequential(modules = layers)

torch.Size([20, 2])
torch.Size([2, 20])


In [8]:
net.forward(Tensor([0.2 , 0.3]),Tensor([-1,1]))

(
 1.00000e-02 *
  -6.2781
   6.9773
 [torch.FloatTensor of size 2], 1.743702232837677)

In [9]:
net.backward()

torch.Size([2, 1])


RuntimeError: inconsistent tensor size at /opt/conda/conda-bld/libtorch_1493853448612/work/pytorch-0.1.12/torch/lib/TH/generic/THTensorMath.c:831

In [10]:
a = 2
def f(x,a=2):
    return a*x
a=3
f(5)

class net(Module)
    def __init__(self,module_list, loss):
        
        self.modules = module_list[0]
        self.loss = loss
    
    def forward(self,x):
        for module in self.modules:
            x = module.forward(x)
        
    def backward(self):
        
        


SyntaxError: invalid syntax (<ipython-input-10-69157f70c632>, line 7)

In [11]:
aaa = Tensor([2.0,3.0])
m1 = Tanh()
m2 = Tanh()
m1.forward(aaa)
m2.forward(aaa)

print(m1.backward(aaa),d_tanh(aaa))


1.00000e-02 *
  3.5325
  0.7400
[torch.FloatTensor of size 2]
 
1.00000e-02 *
  1.7663
  0.2467
[torch.FloatTensor of size 2]



In [12]:
m2.backward(aaa)


1.00000e-02 *
  3.5325
  0.7400
[torch.FloatTensor of size 2]

In [13]:
aaa = Tensor([2.0,3.0])
t = Tensor([0,0])
m = LossMSE()
m.forward(aaa,t)
print(m.backward(aaa),sigma_tanh(aaa))

TypeError: backward() takes 1 positional argument but 2 were given

https://stackoverflow.com/questions/576169/understanding-python-super-with-init-methods for using super()

In [14]:
# prof version

######################################################################

def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

######################################################################
# from F
class Model(Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

def linear(input, weight, bias=None):
    
    output = input.matmul(weight.t())
    if bias is not None:
        output += bias
    return output


class Linear(Module):
    
    def __init__(self, in_features, out_features, bias=True):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input):
        return F.linear(input, self.weight, self.bias)

######################################################################
################# A first model ######################################
def linear_forward(x,w,b):
    s = w.mv(x) + b
    x = sigma(s)
    return s,x

def linear_backward(x0, x, dl_ds_prev, dl_dw, dl_db, initial = False):
    
    dl_dx = w.t().mv(dl_ds_prev)
    dl_ds = dsigma(s1) * dl_dx   
    dl_dw.add_(dl_ds.view(-1, 1).mm(x0.view(1, -1)))
    dl_db.add_(dl_ds)

def forward_pass(ws, bs, x, test = False):
    x0 = x
    
    s1, x1 = linear_forward(x0,ws[0],bs[0])
    s2, x2 = linear_forward(x1,ws[1],bs[1])
    
    xs = [x1, x2]
    ss = [s1, s2]
    if test: 
        return xs[-1]
    return x0, xs, ss


def backward_pass(ws, bs,
                  t,
                  x, xs, ss,
                  dl_dws, dl_dbs):
    x0 = x
    
    dl_dx2 = dloss(xs[-1], t)
    dl_ds2 = dsigma(ss[2-1]) * dl_dx2
    dl_dws[2-1].add_(dl_ds2.view(-1, 1).mm(xs[1-1].view(1, -1)))
    dl_dbs[2-1].add_(dl_ds2)
    
    dl_dx1 = ws[2-1].t().mv(dl_ds2) # w2
    dl_ds1 = dsigma(ss[1-1]) * dl_dx1   
    
    dl_dws[1-1].add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_dbs[1-1].add_(dl_ds1)

# Training:

In [17]:


zeta = 0.9
train_input = train_input
test_input = test_input

nb_hidden=50
print ('train_input.size(): ', train_input.size())
nb_classes = 2 #train_target.size(1) 
nb_train_samples = train_input.size(0)

eta = 0.1 / train_target.size(0)

eps = 1e-6

# weights and biases
w1 = Tensor(nb_hidden, train_input.size(1)).normal_(0,1)
b1 = Tensor(nb_hidden).normal_(0,1)
w2 = Tensor(nb_classes, nb_hidden).normal_(0,eps)
b2 = Tensor(nb_classes).normal_(0,eps)

# lists
ws = [w1, w2]
bs = [b1, b2]
# derivatives of the losse wrt weights and biases
dl_dws = []
dl_dbs = []
for w in ws:
    dl_dws.append(Tensor(w.size()))
for b in bs:
    dl_dbs.append(Tensor(b.size()))


epochs = 250
for k in range (0,epochs):
    
    acc_loss = 0
    nb_train_errors = 0
    
    # set the storage to 0
    for i in range(0, len(dl_dws)):
        dl_dws[i].zero_()
        dl_dbs[i].zero_()
    
    # for each sample run forward and backward pass
    for n in range(0, nb_train_samples):
        
        # run forward pass
        x0, xs, ss = forward_pass(ws, bs, train_input[n])
        
        # prediction is the maximum predicted class
        
        predicted = xs[-1].max(dim = 0)[1] # dim is the axis, 1 for taking index, 0 to just select the value
        pred = predicted [0]
        #print(predicted)
        pred = xs[-1][0]
        #print(xs[-1])
        print(pred)
        # check wether the target was 1 or -1 --> verify if positif
        if train_target[n] != int(pred) : 
            nb_train_errors = nb_train_errors + 1 # if == -1 lets say :p 
        acc_loss += loss(Tensor(1).fill_(pred), train_target[n])
        #acc_loss += loss(pred, train_target[n])

        # run backward pass
        backward_pass(ws, bs,
                      train_target[n],
                      x0, xs, ss,
                      dl_dws, dl_dbs)
    
    # Gradient step
    for i in range(0, len(ws)):
        ws[i] = ws[i] - eta * dl_dws[i]
        bs[i] = bs[i] - eta * dl_dbs[i]

    # Test error
    nb_test_errors = 0
    for n in range(0, test_input.size(0)):
        output = forward_pass(ws, bs, test_input[n], test=True)

        pred = output.max(0)[1][0]
        if test_target[n] != int(output[0]) : nb_test_errors = nb_test_errors + 1  


    print(k,' --> acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))

train_input.size():  torch.Size([1000, 2])
-1.0527702443141607e-06


RuntimeError: bool value of non-empty torch.ByteTensor objects is ambiguous