In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

2024-07-18 11:00:30.956804: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-18 11:00:31.062887: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

## Make some data

In [3]:
N = 10
T = 10
D = 3
K = 2
X = np.random.randn(N,T,D)

In [4]:
X

array([[[ 6.80892133e-01, -1.81218014e+00,  1.31643101e+00],
        [-1.62084512e+00,  4.18163033e-01, -1.27651966e-01],
        [ 1.44434476e+00,  8.19397057e-01,  1.49047386e-01],
        [-2.45397615e-01,  6.88907259e-01, -9.93367961e-01],
        [ 1.62774036e+00, -2.32844830e-01, -8.95853854e-01],
        [-3.77126965e-01, -2.60229899e-01,  1.43042767e-01],
        [-1.29682010e+00, -2.06536800e-01,  1.49363284e+00],
        [-7.82836107e-01, -2.29982089e-01, -6.01258279e-01],
        [-7.83215704e-01, -1.47299803e+00,  1.89134997e-01],
        [-4.85465118e-01, -5.40832025e-01, -1.67319371e+00]],

       [[-7.04758216e-01,  2.83218970e-01,  1.08471644e+00],
        [-5.74729431e-01,  2.05754087e-01, -1.43763236e-01],
        [-1.11982155e+00, -2.68352887e-02,  5.33907165e-01],
        [-1.38301261e+00, -1.44117224e-01,  7.46450427e-02],
        [ 5.22888813e-01,  3.51875416e-02,  2.37998593e+00],
        [ 1.89717120e+00,  5.97589336e-01,  7.88384087e-01],
        [ 8.35194941e-

## Make an RNN

In [5]:
M = 5    # number of hidden units

i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

2024-07-18 11:00:37.161713: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.5289109   1.6421328 ]
 [-0.48574176  0.9382361 ]
 [ 0.18438049  0.3460279 ]
 [ 0.02113365 -0.6390489 ]
 [ 0.06394221 -1.0477499 ]
 [ 1.3379334   0.36301115]
 [ 0.49630532 -1.1265892 ]
 [ 1.2316301  -0.97678185]
 [-1.3719473  -0.4308273 ]
 [-0.07336175 -0.5118206 ]]


In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57 (228.00 Byte)
Trainable params: 57 (228.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
# See what's returned
model.layers[1].get_weights()

[array([[-0.8273862 , -0.7702465 ,  0.46701604,  0.20238537,  0.47578102],
        [-0.7400462 , -0.5104808 , -0.6698031 , -0.03079146,  0.81291586],
        [-0.30824572, -0.27493382, -0.43298814,  0.57565325,  0.5246926 ]],
       dtype=float32),
 array([[ 0.3747121 ,  0.8115034 ,  0.08870562, -0.39028996,  0.2021338 ],
        [-0.05539299, -0.14604956,  0.16138628,  0.1761847 ,  0.95839167],
        [ 0.87945676, -0.27299482, -0.32397497,  0.21560796,  0.02414803],
        [-0.2859345 ,  0.19416578, -0.9232948 ,  0.00605699,  0.16742525],
        [ 0.0363282 , -0.45596626, -0.09302066, -0.87755865,  0.10960374]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [9]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [10]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [11]:
Wx.shape, Wh.shape, bh.shape

((3, 5), (5, 5), (5,))

In [12]:
Wo.shape, bo.shape

((5, 2), (2,))

In [13]:
h_last = np.zeros(M) # initial hidden state
# x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for i in range(len(X)):
    x = X[i]
    for t in range(T):
        h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
        y = h.dot(Wo) + bo # we only care about this value on the last iteration
        
        # important: assign h to h_last
        h_last = h
    Yhats.append(y)

    
# print the final output
print(Yhats)

[array([-0.52891097,  1.64213284]), array([-0.48423531,  0.93804988]), array([0.18084086, 0.3866797 ]), array([ 0.03302102, -0.63677746]), array([ 0.05912491, -1.03611303]), array([1.33970671, 0.3498177 ]), array([ 0.49655873, -1.12667478]), array([ 1.23056202, -0.97833903]), array([-1.37195009, -0.43095558]), array([-0.0698414 , -0.52032083])]
