In [None]:
'''
            #review
            One-hidden layer Neural network 
'''

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, learning_rate):

        super(Net, self).__init__()  # one hidden layer
        self.inputSize = input_size
        self.outputSize = output_size
        self.hiddenSize = hidden_size

        self.learning_rate = learning_rate
        self.current_error = 1
        self.previous_error = 0
        self.netLog = []

        self.last_step = 1
        self.dynamic_learning_rate = 1
        self.learning_gradient = 1
        self.previous_learning_gradient = 1

        # weights initialize
        self.neuronLog = []
        self.W1 = torch.randn(self.inputSize, self.hiddenSize)  # i X h tensor
        self.W2 = torch.randn(self.hiddenSize, self.outputSize)  # h X o tensor
        return

    def forward(self, X):

        self.z1 = torch.matmul(X, self.W1)
        self.z2 = self.sigmoid(self.z1)  # activation function
        self.updateNeuronLog(self.z2)
        self.z3 = torch.matmul(self.z2, self.W2)

        # o = self.z3
        o = self.sigmoid(self.z3)
        return o

    def sigmoid(self, s):
        # sigmoid function
        return (1 / (1 + torch.exp(-s)))

    def relu(self, s):
        # ramp function
        s[s != s] = 0
        return (s + torch.abs(s)) / 2

    def reluPrime(self, s):
        # derivative of ramp function
        return self.relu(s / torch.abs(s))

    def sigmoidPrime(self, s):
        # derivative of sigmoid function
        return s * (1 - s)

    def backward(self, X, y, o):

        self.o_error = y - o  # error in output
        self.o_delta = self.o_error * self.sigmoidPrime(o)  # derivative of relu to error
        self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))
        self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)

        self.W1 += torch.matmul(torch.t(X), self.z2_delta) * self.learning_rate
        self.W2 += torch.matmul(torch.t(self.z2), self.o_delta) * self.learning_rate
        self.current_error = torch.mean(torch.abs(self.o_error))
        self.updateLog()

        self.learning_gradient = (self.previous_error - self.current_error) / self.previous_error
        if self.dynamic_learning_rate == 1 and self.previous_error != 0:

            if self.learning_gradient > self.previous_learning_gradient:
                self.learning_rate = self.learning_rate / 1.1
                # self.last_step = self.last_step / 2
            else:
                self.learning_rate = self.learning_rate * 1.1
                # self.last_step = 1

        self.previous_learning_gradient = self.learning_gradient
        self.previous_error = self.current_error
        return

    def train(self, X, y):
        # forward + backward pass for training
        o = self.forward(X)
        self.backward(X, y, o)
        # print(self.W1, '\n', self.W2)
        return

    def saveWeights(self, model, address):
        # we will use the PyTorch internal storage functions
        torch.save(model, address)
        # torch.load(address)
        return

    def updateLog(self):
        self.netLog.append([self.current_error * 10, self.learning_rate, self.learning_gradient])
        return

    def getLog(self):
        outlog = np.array(self.netLog)
        return outlog

    def updateNeuronLog(self, hidden_neurons_log):
        self.neuronLog.append([np.array(hidden_neurons_log)])
        return

    def getNeuronLog(self):
        return self.neuronLog

    def setLearningRate(self, learning_rate):
        self.learning_rate = learning_rate
        return

    def setDynamicLearningRate(self, dynamic_learning_rate):
        self.dynamic_learning_rate = dynamic_learning_rate
        return

    def setTrain(self, training_sample_input, training_sample_output, batch_size, epoches_number, learning_rate,
                 dynamic_learning_rate):

        self.setLearningRate(learning_rate)
        self.setDynamicLearningRate(dynamic_learning_rate)
        X1 = []
        Y1 = []

        for i in range(0, batch_size):
            X1.append(training_sample_input)
            Y1.append(training_sample_output)

        X1 = torch.cat(X1, 0)
        Y1 = torch.cat(Y1, 0)
        for i in range(0, epoches_number):
            self.train(X1, Y1)

        return



CHANGING THE NEURAL_NETWORK CODE INTO MORE SIMPLER ONE.

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # weights initialize
        self.inpHid1 = nn.Linear(input_size, hidden_size)
        self.hid1Hid2 = nn.Linear(hidden_size, hidden_size)
        self.hid2Out = nn.Linear(hidden_size, output_size)
        self.activationFunction = nn.Tanh()
    
    def forward(self, input, hidden):
        
        # someHow as 
        a = 0.5
        
        # input * weights == firstLayer ...
        # firtsLayer * Function(sigmond funtion instead u(t)) --> input for hiddenLayer
        # HiddenLayer * secondWeights 
        # afterHiddenLayer * Function(Sigmond function instead u(t)) --> final output
        
        # according to the formula.
        hidden = self.activationFunction((1-a)*hidden + a*self.inpHid1(input) + a*self.hid1Hid2(nn.functional.relu(hidden)))
        output = self.activationFunction(self.hid2Out(hidden))
        return output, hidden

    def backProp(self):
        # creating  backpropagation matrix
        return Variable(torch.zeros(1, self.hidden_size))

In [None]:
#
#
#   Task one 
#   perceptual decision making.
#
#
def decision_making_dataset(DC_value1, DC_value2, Input_size, Noise_amplitude, N, DC_val_opt):
    
    if DC_val_opt==1:
        Inp1 = DC_value1 + Noise_amplitude*(torch.rand(1,Input_size,N)-0.5)
        Inp2 = DC_value2 + Noise_amplitude*(torch.rand(1,Input_size,N)-0.5)
        Inp = torch.cat((Inp1,Inp2),0)
        Output = torch.zeros(1,2,N)
        if DC_value2 > DC_value1 :
            Output[0,1,:]=1
        else:
            Output[0,0,:]=1
    else:
        Inp1 = Noise_amplitude*(torch.rand(1,Input_size,N)-0.5)
        Inp2 = Noise_amplitude*(torch.rand(1,Input_size,N)-0.5)
        for j in range(N):
            Inp1[0,:,j] = np.random.randint(1,10) + Inp1[0,:,j]
            Inp2[0,:,j] = np.random.randint(1,10) + Inp2[0,:,j]
            
        Inp = torch.cat((Inp1,Inp2),0)
        Output = torch.zeros(1,2,N)
        for i in range(N):
            if torch.mean(Inp1[0,:,i]) < torch.mean(Inp2[0,:,i]) :
                Output[0,1,i]=1
            else:
                Output[0,0,i]=1
    Output = Output.squeeze()
    return Inp,Output

In [None]:
# sample dataset Creation
import numpy as np
import matplotlib.pylab as plt
X,Y = decision_making_dataset(DC_value1=1, DC_value2=5, Input_size=1, Noise_amplitude=1, N=8, DC_val_opt=1)
print(X.size())
plt.figure
plt.plot(X[0,0,:].numpy())
plt.plot(X[1,0,:].numpy())
plt.show()
print(X)
print(Y)

In [None]:
# first implementation
def predictor(output,N):
    out = torch.zeros(2,N)
    for i in range(N):
        if (output[0,i]>output[1,i]):
            out[0,i] = 1
            out[1,i] = 0
        else:
            out[0,i] = 0
            out[1,i] = 1
    return out
    
    return (output>0.5).double()

def pdm_task1 (learning_rate, hidden_size, batch_size, number_of_epochs, sequence_length=10, dataset_size = 100):

    rnn = RNN(input_size=2, hidden_size=hidden_size, output_size=2)
    
    L = sequence_length
    
    N = dataset_size
    
    num_of_batches = int(N/batch_size)
    
#     input, target = parity_generator(N,L)
    input, target = decision_making_dataset(DC_value1=1, DC_value2=1.1, Input_size=L, Noise_amplitude=0.2, N=N, DC_val_opt=0)
    
    epochs_samples = np.zeros(int(number_of_epochs/20))
    
    epoch_loss = np.zeros(int(number_of_epochs/20))


    for epochs in range(number_of_epochs):
        criterion = nn.MSELoss()

        learning_rate = 0.005

        #optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
        optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

        OUT = torch.tensor(np.zeros((2, N)))

        for i in range (N):
            hidden = rnn.backProp()
            
            for j in range (L):
                output, hidden = rnn.forward(input[:,j,i], hidden)
            loss = criterion(output.float(), target[:,i].unsqueeze(0))
            if(epochs == number_of_epochs-1):
                OUT[:,i] = output;
            loss.backward()
            if (N%batch_size==0):
                optimizer.step()
                
                for j in range (hidden_size):
                    list(rnn.hid1Hid2.parameters())[0].data[j, j].data.copy_(torch.tensor(0))
                    for i in range (hidden_size):
                        sign = 1
                        if j >= hidden_size * 4 / 5:
                            sign = -1
                        if (list(rnn.hid1Hid2.parameters())[0].data[i, j].item() * sign < 0):
                            list(rnn.hid1Hid2.parameters())[0].data[i, j].data.copy_(torch.tensor(0))

                
                rnn.zero_grad()        
        
        if (epochs%20==19):
            print('epoch=',epochs+1, ', loss=', loss.item())
            number = int(epochs/20)
            epochs_samples[number] = epochs
            epoch_loss[number] = loss.item()

    print('convergance to truth! = ',100*torch.sum(predictor(OUT,N)[0,:]==target[0,:]).item()/N)

    plt.figure
    plt.plot(epochs_samples, epoch_loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.show()

    s = (rnn.hid1Hid2.weight.data)
    plt.figure()
    plt.imshow(s)
    plt.title('Weight Matrix')
    plt.xlabel('length increase')
    plt.ylabel('percentage')
    plt.show()

    number_of_tests = 100
    
    true_percentage = np.zeros(10)
    
    for k in range(10):
        delta = 0.2*(k+1)
        test_input, test_target = decision_making_dataset(DC_value1=1, DC_value2=1+delta, Input_size=L, Noise_amplitude=1, N = number_of_tests, DC_val_opt=1)

        test_OUT = torch.tensor(np.zeros((2, number_of_tests)))

        hidden = rnn.backProp()
        for test in range (number_of_tests):
            hidden = rnn.backProp()
            for i in range (L):
                rnn.zero_grad()
                output, hidden = rnn(test_input[:,i,test], hidden)
                if(test == number_of_tests-1):
                    print(output)
            test_OUT[:,test] = output

        true_percentage[k] = 100*torch.sum(predictor(test_OUT,number_of_tests)[0,:]==test_target[0,:]).item()/number_of_tests
        print('convergance to truth! = ',true_percentage[k])
        
    plt.figure
    plt.plot(true_percentage)
    plt.title('convergance to truth!')
    plt.xlabel('length increase')
    plt.ylabel('percentage')
    plt.show()
        
    return true_percentage


In [None]:
true_percentage = np.zeros([10,10])
for i in range (10):
    true_percentage[i,:] = pdm_task1 (learning_rate=0.05, hidden_size=10, batch_size=50, number_of_epochs=100, sequence_length=i+1, dataset_size = 100)

In [None]:
plt.figure
for i in range (1,10):
    plt.plot(true_percentage[i,:],label = 'stimulus length'+str(i))
plt.title('input size & input length effect')
plt.legend('123456789')
plt.show()
    

In [None]:
## Task 1 : Second Implementation

In [None]:
def predictor(output,N):
    out = torch.zeros(2,N)
    for i in range(N):
        if (output[0,i]>output[1,i]):
            out[0,i] = 1
            out[1,i] = 0
        else:
            out[0,i] = 0
            out[1,i] = 1
    return out

    return (output>0.5).double()

def pdm_task2 (learning_rate, hidden_size, batch_size, number_of_epochs, sequence_length=10, dataset_size = 100):

    rnn = RNN(input_size=2, hidden_size=hidden_size, output_size=2)
    
    L = sequence_length
    
    N = dataset_size
    
    num_of_batches = int(N/batch_size)
    
#     input, target = parity_generator(N,L)
    input, target = decision_making_dataset(DC_value1=1, DC_value2=1.1, Input_size=L, Noise_amplitude=0.2, N=N, DC_val_opt=0)
    
    epochs_samples = np.zeros(int(number_of_epochs/20))
    
    epoch_loss = np.zeros(int(number_of_epochs/20))


    for epochs in range(number_of_epochs):
        criterion = nn.MSELoss()

        learning_rate = 0.005

        #optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
        optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

        OUT = torch.tensor(np.zeros((2, N)))
        
        temp_out = torch.zeros(2,L)
        for i in range (N):
            hidden = rnn.backProp()
            
            for j in range (L):
                output, hidden = rnn.forward(input[:,j,i], hidden)
                if (output[0,0]>output[0,1]):
                    temp_out[0,j] = 1
                    temp_out[1,j] = 0
                else:
                    temp_out[0,j] = 0
                    temp_out[1,j] = 1
#                 temp_out[:,j] = predictor(output,1)
                if (j>1 and temp_out[0,j] == temp_out[0,j-1] and temp_out[0,j] == temp_out[0,j-2]):
                    break
            loss = criterion(output.float(), target[:,i].unsqueeze(0))
            if(epochs == number_of_epochs-1):
                OUT[:,i] = output;
#                 print(output.data.numpy())
            loss.backward()
            if (N%batch_size==0):
                optimizer.step()
                
                for j in range (hidden_size):
                    list(rnn.hid1Hid2.parameters())[0].data[j, j].data.copy_(torch.tensor(0))
                    for i in range (hidden_size):
                        sign = 1
                        if j >= hidden_size * 4 / 5:
                            sign = -1
                        if (list(rnn.hid1Hid2.parameters())[0].data[i, j].item() * sign < 0):
                            list(rnn.hid1Hid2.parameters())[0].data[i, j].data.copy_(torch.tensor(0))

                
                rnn.zero_grad()        
        
        if (epochs%20==19):
            print('epoch=',epochs+1, ', loss=', loss.item())
            number = int(epochs/20)
            epochs_samples[number] = epochs
            epoch_loss[number] = loss.item()
    print(predictor(OUT,N))
    print(target)
    print('true percentage on train set = ',100*torch.sum(predictor(OUT,N)[0,:]==target[0,:]).item()/N)

    plt.figure
    plt.plot(epochs_samples, epoch_loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.show()

    s = (rnn.hid1Hid2.weight.data)
    plt.figure()
    plt.imshow(s)
    plt.title('Weight Matrix')    
    plt.show()
    
    number_of_tests = 100
    
    test_OUT = torch.tensor(np.zeros((2, number_of_tests)))
    
    reaction_time = np.zeros(10)
    true_percentage = np.zeros(10)
    
    for k in range(10):
        test_input, test_target = decision_making_dataset(DC_value1=1, DC_value2=1.5+0.5*k, Input_size=L, Noise_amplitude=2, N = number_of_tests, DC_val_opt=1)
        rt = np.zeros(number_of_tests)
        hidden = rnn.backProp()
        for test in range (number_of_tests):
            hidden = rnn.backProp()
            for i in range (L):
                rnn.zero_grad()
                output, hidden = rnn(test_input[:,i,test], hidden)
                if (output[0,0]>output[0,1]):
                        temp_out[0,i] = 1
                        temp_out[1,i] = 0
                else:
                    temp_out[0,i] = 0
                    temp_out[1,i] = 1
                if (i>1 and temp_out[0,i] == temp_out[0,i-1] and temp_out[0,i] == temp_out[0,i-2]):
                    rt[test] = i
                    break

            test_OUT[:,test] = output
            reaction_time[k] = np.mean(rt)
            true_percentage[k] = 100*torch.sum(predictor(test_OUT,number_of_tests)[0,:]==test_target[0,:]).item()/number_of_tests

    plt.figure
    plt.plot(reaction_time)
    plt.title('timeReacted')
    plt.show()
    
    plt.figure
    plt.plot(true_percentage)
    plt.title('convergance to truth')
    plt.show()
       
    return


In [None]:
pdm_task2 (learning_rate=0.05, hidden_size=10, batch_size=50, number_of_epochs=100, sequence_length=20, dataset_size = 100)

In [None]:
# Task 2 : parametric working memory.
# creating data set 

In [None]:
def frequency_dataset(DC_value1, DC_value2, Noise_amplitude, N, DC_val_opt):
    X = torch.zeros(N,25)
    output = torch.zeros(N,2)
    if DC_val_opt == 1:
        for i in range (N):
            X[i,5:10] = DC_value1
            k = np.random.randint(3,8)
            X[i,10+k:15+k] = DC_value2
            if (DC_value2 > DC_value1):
                output[i,1] = 1
            else:
                output[i,0] = 1
    else:
        for i in range(N):
            a1 = np.random.randint(1,10)
            a2 = np.random.randint(1,10)
            if (a1==a2):
                a1=a1+1
            X[i,5:10] = a1
            k = np.random.randint(3,8)
            X[i,10+k:15+k] = a2
            if (a2 > a1):
                output[i,1] = 1
            else:
                output[i,0] = 1       
    X = X + Noise_amplitude*(torch.rand(N,25)-0.5)
    return X, output

In [None]:
## Task 2 : Dataset Check

In [None]:
X, output = frequency_dataset(DC_value1=2, DC_value2=2.2, Noise_amplitude=0.2, N=10, DC_val_opt=0)
plt.figure
plt.plot(X[0,:].numpy())
plt.title('Merged firstInp & secondInp')
plt.show()
print(output)

In [None]:
## Task 2 : Implementation

In [None]:
def predictor(output,N):
    out = torch.zeros(N,2)
    for i in range(N):
        if (output[i,0]>output[i,1]):
            out[i,0] = 1
            out[i,1] = 0
        else:
            out[i,0] = 0
            out[i,1] = 1
    return out
    
    return (output>0.5).double()

def frequency_task (learning_rate, hidden_size, batch_size, number_of_epochs, dataset_size = 100):

    rnn = RNN(input_size=1, hidden_size=hidden_size, output_size=2)
        
    N = dataset_size
    
    num_of_batches = int(N/batch_size)
    
    input, target = frequency_dataset(DC_value1=2, DC_value2=3, Noise_amplitude=0.2, N=N, DC_val_opt=0)
    
    epochs_samples = np.zeros(int(number_of_epochs/20))
    
    epoch_loss = np.zeros(int(number_of_epochs/20))


    for epochs in range(number_of_epochs):
        criterion = nn.MSELoss()

        learning_rate = 0.005

        optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

        OUT = torch.tensor(np.zeros((N, 2)))

        for i in range (N):
            hidden = rnn.backProp()
            
            for j in range (25):
                output, hidden = rnn.forward(input[i,j].unsqueeze(0), hidden)
#             print(output.size())
#             print(target[:,i].unsqueeze(0).size())
            loss = criterion(output.float(), target[i,:].unsqueeze(0))
            if(epochs == number_of_epochs-1):
                OUT[i,:] = output;
#                 print(output.data.numpy())
            loss.backward()
            if (N%batch_size==0):
                optimizer.step()
                
                for j in range (hidden_size):
                    list(rnn.hid1Hid2.parameters())[0].data[j, j].data.copy_(torch.tensor(0))
                    for i in range (hidden_size):
                        sign = 1
                        if j >= hidden_size * 4 / 5:
                            sign = -1
                        if (list(rnn.hid1Hid2.parameters())[0].data[i, j].item() * sign < 0):
                            list(rnn.hid1Hid2.parameters())[0].data[i, j].data.copy_(torch.tensor(0))

                
                rnn.zero_grad()        
        
        if (epochs%20==19):
            print('epoch=',epochs+1, ', loss=', loss.item())
            number = int(epochs/20)
            epochs_samples[number] = epochs
            epoch_loss[number] = loss.item()
    
    plt.figure
    plt.plot(epochs_samples, epoch_loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.show()

    s = (rnn.hid1Hid2.weight.data)
    plt.figure()
    plt.imshow(s)
    plt.title('Weight Matrix')    
    plt.show()
    
    print('convergance to truth TrainMode',100*torch.sum(predictor(OUT,N)[:,0]==target[:,0]).item()/N)

    number_of_tests = 100
    
    true_percentage = np.zeros(20)
    for k in range (20):
        delta = (k+1)*0.5
        
        test_input, test_target = frequency_dataset(DC_value1=2, DC_value2=2+delta, Noise_amplitude=0.2, N = number_of_tests, DC_val_opt=1)

        test_OUT = torch.tensor(np.zeros((number_of_tests,2)))

        hidden = rnn.backProp()
        for test in range (number_of_tests):
            hidden = rnn.init_hidden()
            for i in range (25):
                rnn.zero_grad()
                output, hidden = rnn(test_input[test,i].unsqueeze(0), hidden)

            test_OUT[test,:] = output
            
        true_percentage[k] = 100*torch.sum(predictor(test_OUT,number_of_tests)[:,0]==test_target[:,0]).item()/number_of_tests

        print('convergance to truth',delta,' is ',true_percentage[k])
    delta = np.linspace(0.5,10,num=20)
    plt.figure
    plt.plot(delta, true_percentage)
    plt.title('convergance to truth')
    return


In [None]:
frequency_task (learning_rate=0.05, hidden_size=5, batch_size=20, number_of_epochs=200, dataset_size = 100)

In [None]:
## Task 3 : Dataset

In [None]:
def eye_movement(seq_num, partial_seq_len, test_without_marker=0):
    L = partial_seq_len*5
    X = torch.zeros(17, L)
    out = torch.zeros(2, L)
    X[4,partial_seq_len:2*partial_seq_len] = 1    
    if (seq_num == 1):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[5,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[1,4*partial_seq_len:5*partial_seq_len] = 1
        X[0,4*partial_seq_len:5*partial_seq_len] = 1
        X[2,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[9,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = 1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = -1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = 1
        out[1,4*partial_seq_len:5*partial_seq_len] = 1

    if (seq_num == 2):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[3,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[1,4*partial_seq_len:5*partial_seq_len] = 1
        X[0,4*partial_seq_len:5*partial_seq_len] = 1
        X[2,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[10,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = -1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = 1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = 1
        out[1,4*partial_seq_len:5*partial_seq_len] = 1

    if (seq_num == 3):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[3,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[6,4*partial_seq_len:5*partial_seq_len] = 1
        X[7,4*partial_seq_len:5*partial_seq_len] = 1
        X[8,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[11,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = -1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = 1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = -1
        out[1,4*partial_seq_len:5*partial_seq_len] = -1


    if (seq_num == 4):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[5,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[6,4*partial_seq_len:5*partial_seq_len] = 1
        X[7,4*partial_seq_len:5*partial_seq_len] = 1
        X[8,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[12,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = 1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = -1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = -1
        out[1,4*partial_seq_len:5*partial_seq_len] = -1


    if (seq_num == 5):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[5,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[1,4*partial_seq_len:5*partial_seq_len] = 1
        X[0,4*partial_seq_len:5*partial_seq_len] = 1
        X[2,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[13,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = 1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = 1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = 1
        out[1,4*partial_seq_len:5*partial_seq_len] = 1


    if (seq_num == 6):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[3,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[1,4*partial_seq_len:5*partial_seq_len] = 1
        X[0,4*partial_seq_len:5*partial_seq_len] = 1
        X[2,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[14,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = -1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = -1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = 1
        out[1,4*partial_seq_len:5*partial_seq_len] = 1


    if (seq_num == 7):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[3,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[6,4*partial_seq_len:5*partial_seq_len] = 1
        X[7,4*partial_seq_len:5*partial_seq_len] = 1
        X[8,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[15,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = -1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = -1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = -1
        out[1,4*partial_seq_len:5*partial_seq_len] = -1


    if (seq_num == 8):
        X[4,2*partial_seq_len:3*partial_seq_len] = 1
        X[5,2*partial_seq_len:3*partial_seq_len] = 1
        X[3,2*partial_seq_len:3*partial_seq_len] = 1
        
        X[5,3*partial_seq_len:4*partial_seq_len] = 1
        X[1,3*partial_seq_len:4*partial_seq_len] = 1
        X[7,3*partial_seq_len:4*partial_seq_len] = 1
        
        X[6,4*partial_seq_len:5*partial_seq_len] = 1
        X[7,4*partial_seq_len:5*partial_seq_len] = 1
        X[8,4*partial_seq_len:5*partial_seq_len] = 1
        
        X[16,:] = 1
        
        out[0,2*partial_seq_len:3*partial_seq_len] = 1
        out[0,3*partial_seq_len:4*partial_seq_len] = 0
        out[0,4*partial_seq_len:5*partial_seq_len] = 1
        
        out[1,2*partial_seq_len:3*partial_seq_len] = 0
        out[1,3*partial_seq_len:4*partial_seq_len] = -1
        out[1,4*partial_seq_len:5*partial_seq_len] = -1
    
    if (test_without_marker==1):
        X[9:,:] = 0

    return X, out
    
    

In [None]:
## Task 3 : Implementation

In [None]:
def eye_movement_nn (learning_rate, hidden_size, batch_size, number_of_epochs, sequence_length=10, dataset_size = 100, test_without_marker = 0):

    rnn = RNN(input_size=17, hidden_size=hidden_size, output_size=2)
    
    L = int(sequence_length/5)
    
    N = dataset_size
    
    num_of_batches = int(N/batch_size)
    
    
    #input, target = eye_movement(seq_num, L)
        
    epochs_samples = np.zeros(int(number_of_epochs/20))
    
    epoch_loss = np.zeros(int(number_of_epochs/20))


    for epochs in range(number_of_epochs):
        criterion = nn.MSELoss()

        learning_rate = 0.005

        #optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
        optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

        OUT = torch.tensor(np.zeros((2, sequence_length)))

        for i in range (N):
            hidden = rnn.backProp()
            input, target = eye_movement(seq_num = 1 + i%8, partial_seq_len = L)
            for j in range (sequence_length):
                output, hidden = rnn.forward(input[:,j], hidden)
                loss = criterion(output.float(), target[:,j].unsqueeze(0))
            
                if(epochs == number_of_epochs-1):
                    OUT[:,j] = output;
                loss.backward(retain_graph=True)
            if (N%batch_size==0):
                optimizer.step()
                
                for j in range (hidden_size):
                    list(rnn.hid1Hid2.parameters())[0].data[j, j].data.copy_(torch.tensor(0))
                    for i in range (hidden_size):
                        sign = 1
                        if j >= hidden_size * 4 / 5:
                            sign = -1
                        if (list(rnn.hid1Hid2.parameters())[0].data[i, j].item() * sign < 0):
                            list(rnn.hid1Hid2.parameters())[0].data[i, j].data.copy_(torch.tensor(0))
                
                rnn.zero_grad()        
        
        if (epochs%20==19):
            print('epoch=',epochs+1, ', loss=', loss.item())
            number = int(epochs/20)
            epochs_samples[number] = epochs
            epoch_loss[number] = loss.item()
            
    plt.figure
    plt.plot(epochs_samples, epoch_loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.show()
    
            
    plt.figure
    plt.subplot(2,1,1)
    plt.plot(OUT[0,:].detach().numpy(),label='output')
    plt.plot(target[0,:].detach().numpy(),label='target')
    plt.ylim([-1.5,1.5])
    plt.xlabel('t')
    plt.ylabel('x')
    plt.legend()
    plt.title('Train')

    plt.subplot(2,1,2)
    plt.plot(OUT[1,:].detach().numpy(),label='output')
    plt.plot(target[1,:].detach().numpy(),label='target')
    plt.ylim([-1.5,1.5])
    plt.xlabel('t')
    plt.ylabel('y')
    plt.legend()
    plt.show()
    
    s = (rnn.hid1Hid2.weight.data)
    plt.figure()
    plt.imshow(s)
    plt.title('Weight Matrix')    
    plt.show()
    

    number_of_tests = 8
    
    for i in range(number_of_tests):
        test_input, test_target = eye_movement(seq_num = 1 + i, partial_seq_len = L, test_without_marker = test_without_marker)
        test_OUT = torch.tensor(np.zeros((2, sequence_length)))
        hidden = rnn.backProp()
        for j in range (sequence_length):
            output, hidden = rnn.forward(test_input[:,j], hidden)
            test_OUT[:,j] = output
                
        plt.figure
        plt.subplot(2,1,1)
        plt.plot(test_OUT[0,:].detach().numpy(),label='output')
        plt.plot(test_target[0,:].detach().numpy(),label='target')
        plt.ylim([-1.5,1.5])
        plt.xlabel('t')
        plt.ylabel('x')
        plt.legend()
        plt.title('Test ' + str(i+1))

        plt.subplot(2,1,2)
        plt.plot(test_OUT[1,:].detach().numpy(),label='output')
        plt.plot(test_target[1,:].detach().numpy(),label='target')
        plt.ylim([-1.5,1.5])
        plt.xlabel('t')
        plt.ylabel('y')
        plt.legend()
        plt.show()

        
    return


In [None]:
## Task 3 : Check on larger hidden sized network with full input

In [None]:
eye_movement_nn(learning_rate=0.05, hidden_size=10, batch_size=10, number_of_epochs=100, sequence_length=20, dataset_size = 100, test_without_marker = 0)

In [None]:
## Task 3 : Check on network with non-full input

In [None]:
eye_movement_nn(learning_rate=0.05, hidden_size=10, batch_size=10, number_of_epochs=100, sequence_length=20, dataset_size = 100, test_without_marker = 1)