<a href="https://colab.research.google.com/github/DrAlexSanz/nlpv2-course/blob/master/RNN_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Sizes for recap:

* N = Number of samples
* T = Sequence length
* D = Number of input features
* M = Number of hidden units
* K = Number of output units

In [33]:
# Make the data

N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [34]:
# Make an RNN

M = 5

i = Input(shape = (T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [35]:
# Make a useles prediction (I currently have random weights and random input). Output shape is 1x2.

y_hat_rnn = model.predict(X)
print(y_hat_rnn)

[[-0.31913444  0.56399417]]


In [36]:
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 5)                 45        
                                                                 
 dense_3 (Dense)             (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [37]:
# Let's see the weights

model.layers[1].get_weights()

[array([[-0.11042243,  0.5164575 ,  0.06171751,  0.71425337,  0.7961108 ],
        [ 0.6931971 , -0.6224046 ,  0.7474802 ,  0.6555843 ,  0.4002568 ],
        [ 0.4487353 , -0.7945589 ,  0.363599  ,  0.84450704,  0.8397271 ]],
       dtype=float32),
 array([[-0.0624187 ,  0.9722994 , -0.06729289,  0.20409392, -0.06749175],
        [-0.600134  ,  0.09844292,  0.3165816 , -0.6539168 , -0.31987035],
        [-0.6636033 , -0.10603163, -0.6895978 ,  0.14918658,  0.22491357],
        [-0.38153195, -0.17004135,  0.4268004 ,  0.71192306, -0.36949456],
        [-0.22362275,  0.0691817 ,  0.4873847 ,  0.04058766,  0.8402473 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [38]:
# It's 3 arrays, but what about the shapes?

a, b, c = model.layers[1].get_weights()

# a is input --> hidden (Wx) D x M
# b is hidden --> hidden (Wh) M x M
# c is bias term (bh), length M

print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [39]:
# Now let's assign proper names to the variables

Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [40]:
# And let's see if I can reproduce the RNN manually

h_last = np.zeros(M)
x = X[0]
y_hats = []

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo
    y_hats.append(y)

    h_last = h # For the RNN, the first is 0, then I have values

print(y_hats[-1]) # These two should be the same
print(y_hat_rnn)

[-0.31913447  0.56399408]
[[-0.31913444  0.56399417]]
