In [125]:
import numpy as np

In [126]:
np.random.seed(42)

In [127]:
w_hh = np.random.standard_normal((3,2))
w_hx = np.random.standard_normal((3,3))
h_t_prev = np.random.standard_normal((2,1))
x_t = np.random.standard_normal((3,1))

In [128]:
stack_1 = np.hstack((w_hh, w_hx))

stack_2 = np.vstack((h_t_prev, x_t))

In [129]:
print(np.matmul(np.hstack((w_hh, w_hx)), np.vstack((h_t_prev, x_t))))
print(np.matmul(stack_1,stack_2))
print(stack_2)
print(np.concatenate([h_t_prev, x_t]))

[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]


In [130]:
import numpy as np
from numpy import random
from time import perf_counter
import tensorflow as tf

In [131]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [132]:
random.seed(10)                 # Random seed, so your results match ours
emb = 128                       # Embedding size
T = 256                         # Length of sequence
h_dim = 16                      # Hidden state dimension
h_0 = np.zeros((h_dim, 1))     
 
w1 = random.standard_normal((h_dim, emb + h_dim))
w2 = random.standard_normal((h_dim, emb + h_dim))
w3 = random.standard_normal((h_dim, emb + h_dim))

b1 = random.standard_normal((h_dim, 1))
b2 = random.standard_normal((h_dim, 1))
b3 = random.standard_normal((h_dim, 1))

X = random.standard_normal((T, emb, 1))

weights_vanilla = [w1, b1]
weights_GRU = [w1.copy(), w2, w3, b1.copy(), b2, b3]

In [133]:
def forward_RNN(inputs, weights):
    x, ht = inputs
    wh, bh = weights
    
    ht = np.matmul(wh, np.vstack((ht, x)))+bh
    ht = sigmoid(ht)

    y = ht
    #print(ht)
    return y,ht 




In [134]:
def forward_GRU_RNN(inputs, weights):
    x, ht = inputs
    wu,wr,wh, bu,br,bh = weights

    r = sigmoid(np.matmul(wr, np.vstack((ht, x)))+br)
    u = sigmoid(np.matmul(wu, np.vstack((ht, x)))+bu)
    ct = np.tanh(np.matmul(wh, np.concatenate([r * ht, x]))+bh)

    #print(f'{u}\n+\n{r}\n+\n{ct}')
    
    ht = u*ct + (1-u) * ht
    y = ht

    return y,ht 


In [135]:
print(forward_GRU_RNN([X[1], h_0], weights_GRU))

(array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]), array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]))


In [139]:
def scan(function, elems, weights, initializer=h_0):
    cur_value = initializer
    ys = []
    for x in elems:
        y,cur_value = function([x,cur_value],weights)
        ys.append(y)
    return ys,cur_value

In [138]:
print(len(X))

256


In [141]:
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)

print(f"Length of ys: {len(ys)}")
print(f"Shape of each y within ys: {ys[0].shape}")
print(f"Shape of h_T: {h_T.shape}")

Length of ys: 256
Shape of each y within ys: (16, 1)
Shape of h_T: (16, 1)


In [142]:
tic = perf_counter()
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)
toc = perf_counter()
RNN_time=(toc-tic)*1000
print (f"It took {RNN_time:.2f}ms to run the forward method for the vanilla RNN.")

It took 3.80ms to run the forward method for the vanilla RNN.


In [143]:
tic = perf_counter()
ys, h_T = scan(forward_GRU_RNN, X, weights_GRU, h_0)
toc = perf_counter()
GRU_time=(toc-tic)*1000
print (f"It took {GRU_time:.2f}ms to run the forward method for the GRU.")

It took 9.05ms to run the forward method for the GRU.


In [145]:
model_GRU = tf.keras.Sequential([
    tf.keras.layers.GRU(256, return_sequences=True, name='GRU_1_returns_seq'),
    tf.keras.layers.GRU(128, return_sequences=True, name='GRU_2_returns_seq'),
    tf.keras.layers.GRU(64, name='GRU_3_returns_last_only'),
    tf.keras.layers.Dense(10)
])

In [146]:
try:
    model_GRU.summary()
except Exception as e:
    print(e)

In [147]:
# Remember these three numbers and follow them further through the notebook
batch_size = 60
sequence_length = 50
word_vector_length = 40

input_data = tf.random.normal([batch_size, sequence_length, word_vector_length])

prediction = model_GRU(input_data)

model_GRU.summary()

In [148]:
model_GRU_2 = tf.keras.Sequential([
    tf.keras.layers.GRU(256, return_sequences=True, name='GRU_1_returns_seq'),
    tf.keras.layers.GRU(128, return_sequences=True, name='GRU_2_returns_seq'),
    tf.keras.layers.GRU(64, name='GRU_3_returns_last_only'),
    tf.keras.layers.Dense(10)
])

model_GRU_2.build([None, None, word_vector_length])

model_GRU_2.summary()