Imports

In [1]:
import tensorflow as tf

from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

**Things you should automatically know and have memorized**

N = number of samples

T = sequence length

D = number of input features

M = number of hidden units

K = number of output units

Make some data

In [2]:
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

Make an RNN

In [3]:
M = 5 # number of units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

Get the output

In [6]:
Yhat = model.predict(X)
print(Yhat)

[[0.71072257 0.34516236]]


See if we can replicate this output

Get the weights

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


See what's returned

In [8]:
model.layers[1].get_weights()

[array([[-0.7808734 , -0.35816908, -0.28981602,  0.82273656,  0.44364494],
        [-0.4231303 , -0.54131913,  0.6232963 ,  0.57048005, -0.27552414],
        [ 0.46933192,  0.4955668 ,  0.41336042, -0.36355293, -0.16271383]],
       dtype=float32),
 array([[ 8.1216431e-01, -4.2581198e-01,  1.3863292e-01,  3.2673758e-01,
          1.8192506e-01],
        [ 3.3788252e-01,  5.5323035e-01, -7.2476983e-01,  6.3585214e-02,
          2.2458245e-01],
        [-3.1106126e-01,  2.4277648e-01,  2.8223547e-01,  6.3525778e-01,
          6.0090876e-01],
        [-4.9630113e-02, -2.5916338e-01, -4.3625426e-02, -6.1081415e-01,
          7.4523431e-01],
        [ 3.5637367e-01,  6.2170279e-01,  6.1149788e-01, -3.3548957e-01,
          7.5756386e-04]], dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

Check their shapes

Should make sense

First output is input > hidden

Second output is hidden > hidden

Third output is bias term (vector of length M)

In [9]:
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [10]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [11]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo # we only care abut this value on the last iteration
  Yhats.append(y)

  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[0.7107225  0.34516236]


# Exercise

Calcualte the output for multiple samples at once (N > 1)

Make some data

In [12]:
N = 15
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

Make an RNN

In [13]:
M = 5 # number of units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

Get the output

In [14]:
Yhat = model.predict(X)
print(Yhat)

[[ 3.2107878e-01  8.1672513e-01]
 [-7.7232283e-01 -2.6136041e-01]
 [ 6.2023675e-01 -7.0610940e-01]
 [ 8.6666737e-03  1.8107887e-01]
 [-7.3547900e-01  6.4536732e-01]
 [-3.4864524e-01  1.5925911e-01]
 [-1.0270030e+00 -6.4803934e-01]
 [ 4.0537742e-01  8.9070159e-01]
 [-1.2404529e+00 -5.9781796e-01]
 [ 4.5988134e-01  7.4118952e-04]
 [-8.4295309e-01 -2.9028243e-01]
 [-1.8065454e-01  6.7914057e-01]
 [-1.1858099e+00 -2.9714823e-01]
 [-1.3966591e+00  2.0128535e-01]
 [ 6.9257420e-01  3.2283112e-01]]


See if we can replicate this output

Get the weights

In [15]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 5)                 45        
                                                                 
 dense_1 (Dense)             (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


See what's returned

In [16]:
model.layers[1].get_weights()

[array([[ 0.48419517,  0.6758731 ,  0.10928971,  0.01300222,  0.7184188 ],
        [ 0.76923186,  0.25727147, -0.45567143,  0.36427397,  0.854103  ],
        [ 0.63049155, -0.7312596 , -0.01670563,  0.7684997 ,  0.32615918]],
       dtype=float32),
 array([[ 0.45721662, -0.13795955, -0.47022352,  0.6705499 , -0.3180767 ],
        [-0.5136211 ,  0.48249927, -0.70328104, -0.07209629, -0.05987962],
        [ 0.5092287 ,  0.18389048, -0.13518071, -0.64929456, -0.5167334 ],
        [-0.49635115, -0.61030716,  0.00366774, -0.07522392, -0.6127713 ],
        [ 0.14651981, -0.5846973 , -0.5157501 , -0.34339923,  0.5027323 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

Check their shapes

Should make sense

First output is input > hidden

Second output is hidden > hidden

Third output is bias term (vector of length M)

In [17]:
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [18]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [19]:
for i in range(N):
  h_last = np.zeros(M) # initial hidden state
  x = X[i] # sample
  Yhats = [] # where we store the outputs

  for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care abut this value on the last iteration
    Yhats.append(y)

    # important: assign h to h_last
    h_last = h

  # print the final output
  print(Yhats[-1])

[0.32107889 0.8167251 ]
[-0.77232276 -0.26136049]
[ 0.62023675 -0.70610934]
[0.00866652 0.18107895]
[-0.73547899  0.6453673 ]
[-0.34864536  0.15925922]
[-1.02700314 -0.64803931]
[0.40537726 0.89070166]
[-1.24045312 -0.59781794]
[0.45988131 0.00074118]
[-0.84295307 -0.29028233]
[-0.18065448  0.67914051]
[-1.1858098  -0.29714827]
[-1.39665917  0.20128526]
[0.69257413 0.32283114]
