<a href="https://colab.research.google.com/github/Jency07/Deep-learning-lab/blob/main/20MAI0026_RNN_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

***1.Importing libraries:***

In [16]:
# Imports
%matplotlib notebook

import sys
import numpy as np  # Matrix and vector computation package
import matplotlib

np.random.seed(seed=1)


***2.Creating Dataset:***

In [23]:
# Create dataset
nb_of_samples = 20
sequence_len = 10
# Create the sequences
X = np.zeros((nb_of_samples, sequence_len))
for row_idx in range(nb_of_samples):
    X[row_idx,:] = np.around(np.random.rand(sequence_len)).astype(int)
# Create the targets for each sequence
t = np.sum(X, axis=1)
print("20MAI0026")
print("******************")
print("\n")
print("\nDataset: \n")
print(X)
print("\nTarget: \n")
print(t)


20MAI0026
******************



Dataset: 

[[1. 1. 1. 1. 0. 0. 0. 1. 0. 1.]
 [1. 0. 0. 0. 0. 1. 0. 1. 1. 0.]
 [0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]
 [0. 1. 0. 1. 0. 0. 1. 1. 0. 0.]
 [0. 0. 0. 1. 1. 1. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 1. 0. 0. 1. 0.]
 [0. 1. 1. 1. 0. 1. 1. 0. 0. 1.]
 [0. 1. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 1. 1. 0. 1. 1. 0. 0. 0. 1.]
 [1. 1. 0. 1. 1. 1. 0. 1. 1. 0.]
 [1. 0. 1. 1. 0. 0. 1. 1. 0. 1.]
 [0. 0. 0. 0. 0. 1. 1. 1. 1. 1.]
 [1. 1. 1. 0. 0. 0. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 0. 1. 1. 0. 1.]
 [1. 1. 1. 0. 1. 1. 1. 1. 0. 1.]
 [0. 1. 1. 0. 0. 1. 1. 0. 1. 1.]
 [1. 1. 0. 0. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 0. 1. 0. 1. 1. 0.]
 [0. 0. 0. 1. 1. 0. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 1. 1. 1. 1. 1.]]

Target: 

[6. 4. 4. 4. 4. 3. 6. 2. 6. 7. 6. 5. 5. 5. 8. 6. 8. 4. 3. 7.]


***3. RNN Implementation***

In [10]:
# Define the forward step functions

def update_state(xk, sk, wx, wRec):
    """
    Compute state k from the previous state (sk) and current 
    input (xk), by use of the input weights (wx) and recursive 
    weights (wRec).
    """
    return xk * wx + sk * wRec


def forward_states(X, wx, wRec):
    """
    Unfold the network and compute all state activations 
    given the input X, input weights (wx), and recursive weights 
    (wRec). Return the state activations in a matrix, the last 
    column S[:,-1] contains the final activations.
    """
    # Initialise the matrix that holds all states for all 
    #  input sequences. The initial state s0 is set to 0.
    S = np.zeros((X.shape[0], X.shape[1]+1))
    # Use the recurrence relation defined by update_state to update 
    #  the states trough time.
    for k in range(0, X.shape[1]):
        # S[k] = S[k-1] * wRec + X[k] * wx
        S[:,k+1] = update_state(X[:,k], S[:,k], wx, wRec)
    return S


def loss(y, t): 
    """MSE between the targets t and the outputs y."""
    return np.mean((t - y)**2)

def output_gradient(y, t):
    """
    Gradient of the MSE loss function with respect to the output y.
    """
    return 2. * (y - t)


def backward_gradient(X, S, grad_out, wRec):
    """
    Backpropagate the gradient computed at the output (grad_out) 
    through the network. Accumulate the parameter gradients for 
    wX and wRec by for each layer by addition. Return the parameter 
    gradients as a tuple, and the gradients at the output of each layer.
    """
    # Initialise the array that stores the gradients of the loss with 
    #  respect to the states.
    grad_over_time = np.zeros((X.shape[0], X.shape[1]+1))
    grad_over_time[:,-1] = grad_out
    # Set the gradient accumulations to 0
    wx_grad = 0
    wRec_grad = 0
    for k in range(X.shape[1], 0, -1):
        # Compute the parameter gradients and accumulate the results.
        wx_grad += np.sum(
            np.mean(grad_over_time[:,k] * X[:,k-1], axis=0))
        wRec_grad += np.sum(
            np.mean(grad_over_time[:,k] * S[:,k-1]), axis=0)
        # Compute the gradient at the output of the previous layer
        grad_over_time[:,k-1] = grad_over_time[:,k] * wRec
    return (wx_grad, wRec_grad), grad_over_time
  
  # Define Rprop optimisation function
def update_rprop(X, t, W, W_prev_sign, W_delta, eta_p, eta_n):
    """
    Update Rprop values in one iteration.
    Args:
        X: input data.
        t: targets.
        W: Current weight parameters.
        W_prev_sign: Previous sign of the W gradient.
        W_delta: Rprop update values (Delta).
        eta_p, eta_n: Rprop hyperparameters.
    Returns:
        (W_delta, W_sign): Weight update and sign of last weight
                           gradient.
    """
    # Perform forward and backward pass to get the gradients
    S = forward_states(X, W[0], W[1])
    grad_out = output_gradient(S[:,-1], t)
    W_grads, _ = backward_gradient(X, S, grad_out, W[1])
    W_sign = np.sign(W_grads)  # Sign of new gradient
    # Update the Delta (update value) for each weight 
    #  parameter seperately
    for i, _ in enumerate(W):
        if W_sign[i] == W_prev_sign[i]:
            W_delta[i] *= eta_p
        else:
            W_delta[i] *= eta_n
    return W_delta, W_sign

***4.Training the model***

In [24]:

print("20MAI0026")
print("******************")
print("\n")

# Perform gradient checking
# Set the weight parameters used during gradient checking
params = [1.2, 1.2]  # [wx, wRec]
# Set the small change to compute the numerical gradient
eps = 1e-7
# Compute the backprop gradients
S = forward_states(X, params[0], params[1])
grad_out = output_gradient(S[:,-1], t)
backprop_grads, grad_over_time = backward_gradient(
    X, S, grad_out, params[1])
# Compute the numerical gradient for each parameter in the layer
for p_idx, _ in enumerate(params):
    grad_backprop = backprop_grads[p_idx]
    # + eps
    params[p_idx] += eps
    plus_loss = loss(forward_states(X, params[0], params[1])[:,-1], t)
    # - eps
    params[p_idx] -= 2 * eps
    min_loss = loss(forward_states(X, params[0], params[1])[:,-1], t)
    # reset param value
    params[p_idx] += eps
    # calculate numerical gradient
    grad_num = (plus_loss - min_loss) / (2*eps)
    # Raise error if the numerical grade is not close to 
    #  the backprop gradient
    if not np.isclose(grad_num, grad_backprop):
        raise ValueError((
            f'Numerical gradient of {grad_num:.6f} is not close to '
            f'the backpropagation gradient of {grad_backprop:.6f}!'))
print('No gradient errors found')

# Perform Rprop optimisation

# Set hyperparameters
eta_p = 1.2
eta_n = 0.5

# Set initial parameters
W = [-1.5, 2]  # [wx, wRec]
W_delta = [0.001, 0.001]  # Update values (Delta) for W
W_sign = [0, 0]  # Previous sign of W

ls_of_ws = [(W[0], W[1])]  # List of weights to plot
# Iterate over 500 iterations
for i in range(500):
    # Get the update values and sign of the last gradient
    W_delta, W_sign = update_rprop(
        X, t, W, W_sign, W_delta, eta_p, eta_n)
    # Update each weight parameter seperately
    for i, _ in enumerate(W):
        W[i] -= W_sign[i] * W_delta[i]
    ls_of_ws.append((W[0], W[1]))  # Add weights to list to plot

print(f'Final weights are: wx = {W[0]:.4f},  wRec = {W[1]:.4f}')

20MAI0026
******************


No gradient errors found
Final weights are: wx = 1.0009,  wRec = 0.9998


***5.Testing the model***

In [25]:

print("20MAI0026")
print("******************")
print("\n")

test_inpt = np.asmatrix([[0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1]])
test_outpt = forward_states(test_inpt, W[0], W[1])[:,-1]
sum_test_inpt = test_inpt.sum()
print((
    f'Target output: {sum_test_inpt:d} vs Model output: '
    f'{test_outpt[0]:.2f}'))


20MAI0026
******************


Target output: 5 vs Model output: 5.00
