## Exercise - RNN

In [42]:
import torch
import numpy as np

#reLU function definition
def reLU(x):
    output = torch.max(x, torch.Tensor(x.size(0), x.size(1)).fill_(0))
    return output

#derivative of reLU
def deriv_reLU(x):
    p = torch.max(x, torch.Tensor(x.size(0), x.size(1)).fill_(0))
    q = torch.ceil(p)
    output = torch.min(q, torch.Tensor(x.size(0), x.size(1)).fill_(1))
    return output

# Training data & Test data initialization
SeqL=5
NbSeq=4
torch.manual_seed(4)
X_train = torch.round(torch.rand(SeqL, NbSeq))
X_test = torch.round(torch.rand(SeqL, NbSeq))
T_train = torch.Tensor(SeqL, NbSeq)
T_test = torch.Tensor(SeqL, NbSeq)
for k in range(SeqL):
    T_train[k,:] = torch.t(torch.sum(X_train.narrow(0,0,k+1),0).view(-1,1))
    T_test[k,:] = torch.t(torch.sum(X_test.narrow(0,0,k+1),0).view(-1,1))
#parameters initialization
l_r=0.000001
Wxs = torch.Tensor(2*np.random.random(1))
Wrec = torch.Tensor(2*np.random.random(1)) 

#training

layer_1=torch.Tensor(SeqL,NbSeq).fill_(0)
layer_0 = X_train
s = torch.Tensor(SeqL, NbSeq).fill_(0)
l1_pp_s = torch.Tensor(SeqL, NbSeq).fill_(0)
l1_pp_x = torch.Tensor(SeqL, NbSeq).fill_(0)
l1_sum, l1_sum2 = torch.Tensor(4), torch.Tensor(4)

#forward-pass equations
for iter in range(1000):
    # first number of sequence - no recurrent term
    s[0,:]=Wxs*layer_0.narrow(0,0,1)
    layer_1[0,:]=reLU(s[0,:].view(-1,1))
    for k in range(1, SeqL):
        s[k,:]=Wxs*layer_0.narrow(0,k,1)+Wrec*layer_1.narrow(0,k-1,1)
        layer_1[k,:]=reLU(s[k,:].view(-1,1))
      
    #considered error : 1/2 (T-layer_1)^2
    l1_err_deriv = T_train - layer_1 
    
    for n in range(SeqL):
    
        #product of partial derivatives and multiplication of each product by s(k-1):
        #for the first line its 0 because there is no s(k-1) term
        for k in range(1, n):
            #additional Wrec for the unconsidered last line 
            l1_pp_s[k,:] = deriv_reLU(s.narrow(0,k,1))*layer_1.narrow(0,k-1,1)* \
            (torch.cumprod(Wrec*deriv_reLU(s.narrow(0,k,n-k)),0)[n-k-1,:])

        #sum over k = 1 to k=n+1
        l1_sum += torch.sum(l1_pp_s,0)

        #same for x
        #this time the first line can be computed because s(k) depends on x(k) and not on x(k-1)
        for k in range(0, n):
            l1_pp_x[k,:] = deriv_reLU(s.narrow(0,k,1))*layer_0.narrow(0,k,1)* \
            (torch.cumprod(Wrec*deriv_reLU(s.narrow(0,k,n-k)),0)[n-k-1,:])

        #sum over k = 1 to k = n+1
        l1_sum2 += torch.sum(l1_pp_x,0)
    
    #new coefficients
    Wrec += l_r*torch.sum(l1_err_deriv*torch.t((l1_sum/SeqL).view(-1,1)))
    Wxs += l_r*torch.sum(l1_err_deriv*torch.t((l1_sum2/SeqL).view(-1,1)))

    #end of training
#test phase
print(layer_1, X_train, T_train)
layer_0 = X_test
s[0,:]=Wxs*layer_0.narrow(0,0,1)
layer_1[0,:]=reLU(s[0,:].view(-1,1))
for k in range(1,SeqL):
    s[k,:]=Wxs*layer_0.narrow(0,k,1)+Wrec*layer_1.narrow(0,k-1,1)
    layer_1[k,:]=reLU(s[k,:].view(-1,1))
      
    #end of test phase
print(layer_1, X_test, T_test)


 0.9982  0.9982  0.0000  0.0000
 0.9996  0.9996  0.9982  0.9982
 1.9992  1.9992  0.9996  0.9996
 3.0002  2.0020  1.9992  1.9992
 4.0026  2.0048  2.0020  2.0020
[torch.FloatTensor of size 5x4]
 
 1  1  0  0
 0  0  1  1
 1  1  0  0
 1  0  1  1
 1  0  0  0
[torch.FloatTensor of size 5x4]
 
 1  1  0  0
 1  1  1  1
 2  2  1  1
 3  2  2  2
 4  2  2  2
[torch.FloatTensor of size 5x4]


 0.9982  0.0000  0.9982  0.9982
 1.9978  0.9982  1.9978  1.9978
 2.9988  0.9996  2.9988  2.9988
 3.0030  1.9992  4.0012  3.0030
 3.0072  3.0002  5.0050  4.0054
[torch.FloatTensor of size 5x4]
 
 1  0  1  1
 1  1  1  1
 1  0  1  1
 0  1  1  0
 0  1  1  1
[torch.FloatTensor of size 5x4]
 
 1  0  1  1
 2  1  2  2
 3  1  3  3
 3  2  4  3
 3  3  5  4
[torch.FloatTensor of size 5x4]

