This notebook is for testing each part of the RNN implementation as it is made. <br>
There are 2 main parts, the forward pass and the backward pass. <br>
The forward pass is straightforward, but the backward pass requires some more work. <br>
First is the implementation of the forward pass.

The formulas used here have been referenced from https://medium.com/@thisislong/building-a-recurrent-neural-network-from-scratch-ba9b27a42856

In [2]:
import numpy as np

In [4]:
def forward_computation(xt, a_prev, parameters):
    # This function is for the computation from one input vector x and the previous hidden state, a_prev, to the next hidden state, a_next and the output, y.
    # The input is the input vector x, a_prev, and the parameters Waa, Wax, Wya, ba, and by.
    # It returns a_next, y, and the cache for backpropagation.

    # Retrieve parameters from "parameters"
    Waa = parameters["Waa"]
    Wax = parameters["Wax"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    # compute next activation state using its formula
    a_next = np.tanh(Waa @ a_prev + Wax @ xt + ba)
    # compute output of the current cell using its formula
    y = Wya @ a_next + by

    # store values needed for backpropagation in cache
    cache = (a_next, a_prev, xt, parameters)

    return a_next, y, cache

In [5]:
def forward_pass(x, a0, parameters):
    # This function uses the previous forward computation to compute the forward pass for a given input sequence.
    # The input sequence x is a mini-batch of input vectors.
    # The initial hidden state is a0.
    # The function returns a list of all the hidden states, a, and the outputs, y, and the list of caches for backpropagation.

    # Initialize "caches"
    caches = []

    # Retrieve the necessary dimensions, which are, input shape, batch size, number of time steps, shape of hidden units, and shape of output units
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wya"].shape

    # Initialize "a", and "y" for storing the corresponding values
    a = np.zeros((n_a, m, T_x))
    y = np.zeros((n_y, m, T_x))

    # Initialize "a_next" to the initial hidden state
    a_next = a0

    # loop over all time steps
    for t in range(T_x):
        # Call forward_computation
        a_next, yt, cache = forward_computation(x[:, :, t], a_next, parameters)
        # Save the value of the new hidden state in a
        a[:, :, t] = a_next
        # Save the value of the prediction in y
        y[:, :, t] = yt
        # Append "cache" to "caches"
        caches.append(cache)

    # Add the input which resulted in these parameters
    caches = (caches, x)

    return a, y, caches