In [1]:
from tensorflow.keras.layers import Input,Dense,SimpleRNN, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD,Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
##Things you should automatically  know and have memorized
#N = number of samples
#T = sequence length
#D = Number of input features
#M = Number of hidden units
#K =Number of output units

In [3]:
#Make some data
N = 1
T = 10
D = 3
K = 2
X =np.random.randn(N,T,D)

In [4]:
X

array([[[-0.05137207,  0.44801364,  1.60180604],
        [-0.06319191,  0.20798733,  0.96020624],
        [ 0.91928665, -1.04723045,  0.71334942],
        [ 0.90963896, -0.83005337, -0.43358196],
        [ 0.44530591, -1.5153067 , -0.25032755],
        [-2.16030545, -0.37537482, -1.36430583],
        [-0.991054  ,  0.59199417, -0.48505332],
        [ 1.32058217, -0.54827316, -0.76771184],
        [-0.22489631, -1.52472617,  2.36305336],
        [-1.63121219, -0.50763436, -0.83402387]]])

In [6]:
#Make an RNN
M = 5 #number of hidden units
i = Input(shape =(T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model =Model(i, x)

In [21]:
#Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.79327434  0.01528658]]


In [22]:
#See if we can replicate this output
#Get the weights first
model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [23]:
#See what's returned
model.layers[1].get_weights()

[array([[-0.6199136 , -0.41290352,  0.61526257, -0.65031916,  0.563682  ],
        [ 0.5577951 ,  0.6126407 ,  0.7336349 , -0.6990365 , -0.38740078],
        [-0.05981624, -0.05361247,  0.81018096, -0.83001095,  0.04288495]],
       dtype=float32),
 array([[ 0.7505976 , -0.55651826,  0.26421338,  0.22888437,  0.06851096],
        [-0.31120002,  0.13835776,  0.8262481 ,  0.30453297,  0.32952288],
        [ 0.3445085 ,  0.3030033 ,  0.339734  , -0.8149803 ,  0.09945287],
        [ 0.45883206,  0.7548065 , -0.0462339 ,  0.4346481 , -0.16936249],
        [ 0.10268752,  0.09798254, -0.36048377,  0.04194967,  0.9209486 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [24]:
#Check their shapes
#Should make sense
#First output is input > hidden
#Second output is hidden > hidden
#Third output is bias term( vector of length M)
a,b,c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [25]:
Wx, Wh, bh = model.layers[1].get_weights() #hidden layer
Wo, bo = model.layers[2].get_weights() # output layer

In [26]:
h_last = np.zeros(M) #initial hidden state
x= X[0] #where we store the outputs
Yhats = []

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo #We only care about this value on the last iteration
  Yhats.append(y)

  #important: assign h to h_last
  h_last = h

#print the final ouput
print(Yhats[-1])

[-0.79327438  0.01528642]
