## 1. Generate some input data (X) consisting of 30 sequences of 8 binary numbers, following a uniform distribution, where the probability of generating a “0” is the same as that of generating a “1”. Make the output (y) for each sequence be the sum of its elements.

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from IPython.display import clear_output

In [2]:
I  = 30 # Number of training examples
T = 8 # Length of the sequence 
n, p = 1, .5
X = np.random.binomial(n, p, (I, T))
Y = []
for x in X:
    Y = np.append(Y, np.sum(x))
Y = Y.reshape(I, 1)

In [3]:
alpha_x_bak = 0.0001
alpha_f_bak = 0.0001
epsilon = 1400

delta_init = 0.0003
eta_n = 0.5
eta_p = 1.2

alpha_x_cli = 0.001
alpha_f_cli = 0.001
eta = 5

## 2. Implement a Sequential adder using the Elman Recurrent Neural Network (RNN)

In [4]:
def forward(X, Vx, Vf):
    '''
    Function for the forward propogation phase of our RNN returns the 
    final results after the whole sequence has been treated
    '''
    Ft = np.zeros(X.shape[0]) # current output we saw
    F = np.zeros(X.shape) # to store intermediate values of Ft
    for t in range(X.shape[1]):
        F[:, t] = Ft
        Ft = Vf * Ft + Vx * X[:, t]
    return Ft.reshape(X.shape[0], 1), F

### BackPropogation

In [5]:
def sse(Y, predictions):
    return 0.5 * np.sum(np.square(predictions - Y))

def compute_dE(X, F, Y, predictions, Vx, Vf):
    dEVx = 0
    dEVf = 0
    T = X.shape[1]
    for t in range(T):
        sx = 0
        sf = 0
        for i, x in enumerate(X):
            sx += (predictions[i] - Y[i]) * X[t]
            sf += (predictions[i] - Y[i]) * F[i][t]
        c = (Vf ** (T-t+1))
        dEVx += sx * c
        dEVf += sf * c
    return dEVx, dEVf


def backward(X, F, Y, predictions, Vx, Vf, alphax, alphaf):
    '''
    Vanilla Backpropogation Function, it is called at each batch or Forward pass 
    it updates the values of the parameters and returns Vf and Vx
    '''
    dEVx, dEVf = compute_dE(X, F, Y, predictions, Vx, Vf)
    Vx -= (alphax * dEVx)
    Vf -= (alphaf * dEVf)
    print("dEVx :",dEVx,"dEVf :",dEVf)
    print("Vx :",Vx,"Vf :",Vf)
    return Vx, Vf, dEVx, dEVf

In [7]:
# Train Function for the BackPropogation Algorithm
# Outputs the optimal weights and the errors during training 

def train_backward(X, Y, alphax, alphaf, epsilon, Vx, Vf):
    SSE_arr = []    #Array to store all SSE over time
    Vx_evo = [] #Array to store all Vx over time
    Vf_evo = [] #Array to store all Vf over time
    dEVx_evo = [] #Array to store all dEVx over time
    dEVf_evo = [] #Array to store all dEVf over time
    
    diff_error = epsilon+1
    prev_diff_error = diff_error
    prev_error = 0
    
    n_iter = 0
    stop_loop = 0
    for p in range(400):
        n_iter += 1
        predictions, F = forward(X, Vx, Vf)
        cur_error = SSE(Y, predictions)
        SSE_arr = np.append(SSE, cur_error)
        
        Vf, Vx, dEVx, dEVf = backward(X, F, Y, predictions, Vx, Vf, alphax, alphaf)
        Vx_evo = np.append(Vx_evo,Vx)
        Vf_evo = np.append(Vf_evo,Vf)
        dEVx_evo = np.append(dEVx_evo,dEVx)
        dEVf_evo = np.append(dEVf_evo,dEVf)
        
        prev_diff_error = diff_error
        if (n_iter > 1):
            diff_error = abs(cur_error - prev_error)
        prev_error = cur_error
        print("Training #",n_iter," Diff SSE: ",diff_error, " SSE : ",cur_error)
        print("dEVx :",dEVx,"dEVf :",dEVf)
        print("Vx :",Vx,"Vf :",Vf)
        print("__________________________________________________________________")
         #if the current and previous error difference is lesser than espilon, increment the stopping variable
        if(cur_error < epsilon):
            stop_loop+=1
        else :
            stop_loop = 0
        
    print("Finished after {} iterations, finals weights Vx {} Vf {}".format(n_iter,Vx,Vf))
    return Vx_evo,Vf_evo,dEVx_evo,dEVf_evo,SSE_arr

### Resilient Propagation

In [None]:
# Training a resilient backpropogation model
# Return: Outputs the optimal weights and the errors during training 
def train_resilient(X, y, delta_init, eta_p, eta_n, epsilon, Vx, Vf):
    SSE = []    #Array to store all SSE over time
    Vx_evo = [] #Array to store all Vx over time
    Vf_evo = [] #Array to store all Vf over time
    dEVx_evo = [] #Array to store all dEVx over time
    dEVf_evo = [] #Array to store all dEVf over time
    
    delta_f = delta_init
    delta_x = delta_init
    diff_error = 0.0
    prev_diff_error = diff_error
    prev_error = 0
    cur_error = 0
    dEVx,dEVf = 0,0
    
    stop_loop=0
    n_iter=0
    
    for p in range(400):
        # Loop while we don't have three consecutive error differences lesser than epsilon, meaning we converged 
        
        n_iter += 1
        predictions, F = forward(X, Vx, Vf)
        
        # Save previous derivative for sign comparison
        prev_dEVx = dEVx
        prev_dEVf=dEVf
        
        dEVx,dEVf = compute_dE(X,F,Y,predictions,Vx,Vf)
        
        if np.sign()