In [2]:
import numpy as np
from rnn_utils import *

In [3]:
def rnn_cell_forward(xt, a_prev, parameters):
    """
    Implements a single forward step of the RNN-cell as described in Figure (2)

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        ba --  Bias, numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass, contains (a_next, a_prev, xt, parameters)
    """
    
    # Retrieve parameters from "parameters"
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]
    
    ### START CODE HERE ### (≈2 lines)
    # compute next activation state using the formula given above
    a_next = np.tanh((np.dot(Waa,a_prev)+ np.dot(Wax, xt)+ba))
    # compute output of the current cell using the formula given above
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    ### END CODE HERE ###
    
    # store values you need for backward propagation in cache
    cache = (a_next, a_prev, xt, parameters)
    
    return a_next, yt_pred, cache

In [4]:
np.random.seed(1)
xt_tmp = np.random.randn(3,10)
a_prev_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Waa'] = np.random.randn(5,5)
parameters_tmp['Wax'] = np.random.randn(5,3)
parameters_tmp['Wya'] = np.random.randn(2,5)
parameters_tmp['ba'] = np.random.randn(5,1)
parameters_tmp['by'] = np.random.randn(2,1)

In [10]:
print('a_prev')
print(a_prev_tmp)
print(a_prev_tmp.shape)
print('=============')
print('xt')
print(xt_tmp)
print(xt_tmp.shape)

a_prev
[[-0.69166075 -0.39675353 -0.6871727  -0.84520564 -0.67124613 -0.0126646
  -1.11731035  0.2344157   1.65980218  0.74204416]
 [-0.19183555 -0.88762896 -0.74715829  1.6924546   0.05080775 -0.63699565
   0.19091548  2.10025514  0.12015895  0.61720311]
 [ 0.30017032 -0.35224985 -1.1425182  -0.34934272 -0.20889423  0.58662319
   0.83898341  0.93110208  0.28558733  0.88514116]
 [-0.75439794  1.25286816  0.51292982 -0.29809284  0.48851815 -0.07557171
   1.13162939  1.51981682  2.18557541 -1.39649634]
 [-1.44411381 -0.50446586  0.16003707  0.87616892  0.31563495 -2.02220122
  -0.30620401  0.82797464  0.23009474  0.76201118]]
(5, 10)
xt
[[ 1.62434536 -0.61175641 -0.52817175 -1.07296862  0.86540763 -2.3015387
   1.74481176 -0.7612069   0.3190391  -0.24937038]
 [ 1.46210794 -2.06014071 -0.3224172  -0.38405435  1.13376944 -1.09989127
  -0.17242821 -0.87785842  0.04221375  0.58281521]
 [-1.10061918  1.14472371  0.90159072  0.50249434  0.90085595 -0.68372786
  -0.12289023 -0.93576943 -0.26788

In [18]:
np.tanh(np.dot(parameters_tmp['Waa'],a_prev_tmp )+ np.dot(parameters_tmp['Wax'], xt_tmp)+parameters_tmp['ba'])

array([[ 0.95442347, -0.97959841, -0.77682357, -0.85960496,  0.2996283 ,
        -0.72828789,  0.70341981,  0.396781  ,  0.64215271, -0.68720152],
       [-0.77817006, -0.96939535, -0.90158668, -0.89269334, -0.94794605,
        -0.62569074, -0.7847199 ,  0.73807292,  0.40638533, -0.49874722],
       [ 0.34337788, -0.99997631, -0.99692205, -0.98133709, -0.93123291,
        -0.99802557, -0.99662894, -0.93641136, -0.25153222,  0.54770565],
       [-0.85404662,  0.97190276,  0.60516394,  0.65999969, -0.68038654,
         0.09222782,  0.34729991,  0.41705046, -0.44431726,  0.74395075],
       [ 0.59584544,  0.18141802,  0.61311866,  0.99808218,  0.85016201,
         0.99980978, -0.18887155,  0.99815551,  0.6531151 ,  0.82872037]])

In [19]:
a_next_tmp, yt_pred_tmp, cache_tmp = rnn_cell_forward(xt_tmp, a_prev_tmp, parameters_tmp)
print("a_next[4] = \n", a_next_tmp[4])
print("a_next.shape = \n", a_next_tmp.shape)
print("yt_pred[1] =\n", yt_pred_tmp[1])
print("yt_pred.shape = \n", yt_pred_tmp.shape)

a_next[4] = 
 [ 0.59584544  0.18141802  0.61311866  0.99808218  0.85016201  0.99980978
 -0.18887155  0.99815551  0.6531151   0.82872037]
a_next.shape = 
 (5, 10)
yt_pred[1] =
 [0.9888161  0.01682021 0.21140899 0.36817467 0.98988387 0.88945212
 0.36920224 0.9966312  0.9982559  0.17746526]
yt_pred.shape = 
 (2, 10)


In [21]:
a_next_tmp.shape

(5, 10)