In [1]:
# Install TensorFlow
# !pip install -q tensorflow-gpu==2.0.0-beta1

try:
    %tensorflow_version 2.x  # Colab only.
except Exception:
    pass

import tensorflow as tf
print(tf.__version__)

2.3.1


In [2]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')

Things you should automatically know and have memorized :
- N = number of samples
- T = sequence length
- D = number of input features
- M = number of hidden units
- K = number of output units

In [3]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [4]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [5]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.8550651  0.283166 ]]


In [6]:
# See if we can replicate this output
# Get the weights first
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 5)                 45        
_________________________________________________________________
dense (Dense)                (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [7]:
# See what's returned
model.layers[1].get_weights()

[array([[ 0.12038988, -0.178244  , -0.5977818 , -0.35886657, -0.5249387 ],
        [-0.5009583 , -0.7376007 , -0.85453355,  0.84260637, -0.58272743],
        [ 0.25938445,  0.6050146 , -0.10853481, -0.67347467, -0.21369576]],
       dtype=float32),
 array([[-0.66141057,  0.5249954 , -0.53215104,  0.05715685,  0.02154645],
        [ 0.17171894, -0.20291324, -0.5056243 , -0.68780315, -0.44789484],
        [ 0.0875669 , -0.42093942, -0.46218592,  0.7013838 , -0.33103874],
        [ 0.07061481, -0.35638615, -0.4175311 , -0.10806693,  0.8258279 ],
        [ 0.72138363,  0.6156337 , -0.27057582,  0.14156973,  0.08571783]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [8]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [9]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [10]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
    Yhats.append(y)

    # important: assign h to h_last
    h_last = h

# print the final output
print(Yhats[-1])

[-0.85506506  0.28316603]


In [11]:
# Bonus exercise: calculate the output for multiple samples at once (N > 1)