In [1]:
import keras
import tensorflow as tf

## Simple RNN

**1. Simple RNN Memory Cell**

$$
\hat{Y}_t = \phi(X_t W_x + \hat{Y}_{t-1} W_{\hat{y}} + b)
$$

Where:

- $\hat{Y}_t$ is an $m \times n_{\text{neurons}}$ matrix containing the layer’s outputs at time step $t$ for each instance in the mini-batch ($m$ is the number of instances in the mini-batch and $n_{\text{neurons}}$ is the number of neurons).  
- $X_t$ is an $m \times n_{\text{inputs}}$ matrix containing the inputs for all instances ($n_{\text{inputs}}$ is the number of input features).  
- $W_x$ is an $n_{\text{inputs}} \times n_{\text{neurons}}$ matrix containing the connection weights for the inputs of the current time step.  
- $W_{\hat{y}}$ is an $n_{\text{neurons}} \times n_{\text{neurons}}$ matrix containing the connection weights for the outputs of the previous time step.  
- $b$ is a vector of size $n_{\text{neurons}}$ containing each neuron’s bias term.  

In [2]:
@tf.function
def rnn_cell(x_t, h_t_1, w_x, w_h, b):
    return tf.tanh(x_t @ w_x + h_t_1 @ w_h + b)

In [3]:
batch_size = 32
units = 8
n_features = 12
timesteps = 16

kernel_init = keras.initializers.glorot_uniform(seed=42) # best for tanh activation
recurrent_init = keras.initializers.orthogonal(seed=42) # best for recurrent weights

w_x = kernel_init(shape=(n_features, units))
w_h = recurrent_init(shape=(units, units))
b = tf.zeros(shape=(units)) # 1 bias per neuron
h_init = tf.zeros(shape=(batch_size,units)) # if this was stateful rnn, we fed last hidden state of sequence before instead

In [4]:
tf.random.set_seed(42)
input_sequence = tf.random.normal(shape=(batch_size,timesteps,n_features)) # typical 3D rnn input

In [5]:
x_t = input_sequence[:,0,:] # each timestep x(t) shape: (batch_size, n_features)
h_t = rnn_cell(x_t, h_init, w_x, w_h, b) # returns one hidden state per sample-neuron: (batch_size, units)
y_t = h_t # in simple rnn, timestep t's output is equal to its hidden state
y_t.shape

TensorShape([32, 8])

In [6]:
h_t_1 = h_init
hiddens_states = []

for t in range(timesteps):
    x_t = input_sequence[:,t,:] 
    h_t = rnn_cell(x_t, h_t_1, w_x, w_h, b)
    hiddens_states.append(h_t)
    h_t_1 = h_t

outputs = tf.stack(hiddens_states, axis=1) # stacked on axis=1, timesteps
outputs.shape # returns (batch_size, timesteps, units)

TensorShape([32, 16, 8])

In [7]:
# lets wrap eveything in a function

def simple_rnn(input_sequence, units, return_sequences=True, seed=42):
    assert input_sequence.ndim == 3, "RNN expects 3D input"
    batch_size, timesteps, n_features = input_sequence.shape

    kernel_init = keras.initializers.glorot_uniform(seed=seed)
    recurrent_init = keras.initializers.orthogonal(seed=seed)
    w_x = kernel_init(shape=(n_features, units))
    w_h = recurrent_init(shape=(units, units))
    b = tf.zeros(shape=(units)) 
    h_init = tf.zeros(shape=(batch_size,units))

    h_t_1 = h_init
    hiddens_states = []

    for t in range(timesteps):
        x_t = input_sequence[:,t,:] 
        h_t = rnn_cell(x_t, h_t_1, w_x, w_h, b)
        hiddens_states.append(h_t)
        h_t_1 = h_t
    
    outputs = tf.stack(hiddens_states, axis=1)
    
    return outputs if return_sequences else outputs[:,-1,:] # or else return last timestep output

In [8]:
my_simple_rnn_outputs = simple_rnn(input_sequence, units, return_sequences=True, seed=42)

keras_simple_rnn = keras.layers.SimpleRNN(units,
                                          return_sequences=True, 
                                          kernel_initializer=kernel_init, 
                                          recurrent_initializer=recurrent_init)

keras_simple_rnn_outputs = keras_simple_rnn(input_sequence)

print(f"Are my results same as keras layer? {(my_simple_rnn_outputs == keras_simple_rnn_outputs).numpy().all()}")

Are my results same as keras layer? True


Yay!

## LSTM

## GRU

## Attention (Concatanative)

## Attention (Luong)