In [1]:
%cd /content/drive/MyDrive

/content/drive/MyDrive


In [2]:
from rnn_utils import *
import numpy as np

In [8]:
### Implementing basic RNN

def rnn_cell_forward(xt, a_prev, parameters):
    """
    xt= input data at timestep "t", numpy array of shape (n_x, m).
    a_prev= Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    parameters= python dictionary containing:
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        ba --  Bias, numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
    """
    
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    a_next_timestep=np.tanh(np.dot(Wax,xt) + np.dot(Waa,a_prev) + ba)
    pred_timestep= softmax(np.dot(Wya,a_next_timestep)+by)
    cache = (a_next_timestep, a_prev, xt, parameters)
    
    return a_next_timestep, pred_timestep, cache
np.random.seed(1)
xt=np.random.randn(3,10)
a_prev = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a_next, pred_timestep, cache= rnn_cell_forward(xt,a_prev,parameters)
a_next.shape, pred_timestep.shape

((5, 10), (2, 10))

In [14]:
def rnn_forward(x, a0, parameters):
    """
    Arguments:
    x = Input data for every time-step, of shape (n_x, m, T_x).
    a0 = Initial hidden state, of shape (n_a, m)
    """
    caches = []                # Initialize "caches" which will contain the list of all caches

    n_x, m, T_x = x.shape      # Retrieve dimensions from shapes of x and parameters["Wya"]
    n_y, n_a = parameters["Wya"].shape
    
    a_next=a0
    a=np.zeros((n_a,m,T_x))
    predictions=np.zeros((n_y,m,T_x))

    for t in range(T_x):
      a_next, pred_timestep, cache= rnn_cell_forward(x[:,:,t],a_next,parameters)
      a[:,:,t]=a_next
      predictions[:,:,t]=pred_timestep
      caches.append(cache)
    caches = (caches, x)
    
    return a, predictions, caches

np.random.seed(1)
x = np.random.randn(3,10,4)
a0 = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a, y_pred, caches = rnn_forward(x, a0, parameters)
a.shape, y_pred.shape,a[1][1][1] #first dimension of activation for first training example at the first time step

((5, 10, 4), (2, 10, 4), -0.9980852070002562)

In [20]:
### Implementing basic LSTM structure

def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    parameters -- python dictionary containing:
                        Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
                        bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
                        Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x)
                        bi -- Bias of the update gate, numpy array of shape (n_a, 1)
                        Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x)
                        bc --  Bias of the first "tanh", numpy array of shape (n_a, 1)
                        Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x)
                        bo --  Bias of the output gate, numpy array of shape (n_a, 1)
                        Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)

    """
    # Retrieve parameters from "parameters"
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]
    
    # Retrieve dimensions from shapes of xt and Wy
    n_x, m = xt.shape   #3,10
    n_y, n_a = Wy.shape #2,5

    concat=np.zeros([n_a+n_x,m])
    concat[:n_a,:]=a_prev
    concat[n_a:,:]=xt

    forget_gate=sigmoid(np.dot(Wf,concat)+bf)
    update_gate=sigmoid(np.dot(Wi,concat)+bi)
    c_tilda=np.tanh(np.dot(Wc,concat)+bc)
    output_gate=sigmoid(np.dot(Wo,concat)+bo)

    cell_state=forget_gate*c_prev + update_gate*c_tilda
    a_next=update_gate*np.tanh(cell_state)

    pred_next=softmax(np.dot(Wy,a_next)+by)
    cache = (a_next, cell_state, a_prev, c_prev, forget_gate, update_gate, c_tilda, output_gate, xt, parameters)

    return a_next, cell_state, pred_next, cache

np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
c_prev = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}

a_next, c_next, yt, cache = lstm_cell_forward(xt, a_prev, c_prev, parameters)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", c_next.shape)
print("c_next[2] = ", c_next[2])
print("c_next.shape = ", c_next.shape)
print("yt[1] =", yt[1])
print("yt.shape = ", yt.shape)
print("cache[1][3] =", cache[1][3])
print("len(cache) = ", len(cache))


a_next[4] =  [-0.76386258  0.26723682  0.00566367  0.00286592 -0.22309686  0.75466609
  0.79208484  0.07053749 -0.12783557  0.09979419]
a_next.shape =  (5, 10)
c_next[2] =  [ 0.63267805  1.00570849  0.35504474  0.20690913 -1.64566718  0.11832942
  0.76449811 -0.0981561  -0.74348425 -0.26810932]
c_next.shape =  (5, 10)
yt[1] = [0.83783903 0.11489852 0.28282751 0.31999615 0.73845617 0.02930639
 0.0087668  0.30423149 0.52150241 0.32567821]
yt.shape =  (2, 10)
cache[1][3] = [-0.16263996  1.03729328  0.72938082 -0.54101719  0.02752074 -0.30821874
  0.07651101 -1.03752894  1.41219977 -0.37647422]
len(cache) =  10


In [25]:
def lstm_forward(x, a0, parameters):
    caches=[]
    n_x,m,T_x=x.shape
    caches = []                # Initialize "caches" which will contain the list of all caches

    n_x, m, T_x = x.shape      # Retrieve dimensions from shapes of x and parameters["Wya"]
    n_y, n_a = parameters["Wy"].shape
    
    a_next=a0
    c=np.zeros([n_a,m,T_x])
    a=np.zeros([n_a,m,T_x])
    predictions=np.zeros([n_y,m,T_x])
    cell_state=c[:,:,0]

    for t in range(T_x):
      a_next, cell_state, pred_next, cache= lstm_cell_forward(x[:,:,t],a_next,cell_state,parameters)
      a[:,:,t]=a_next
      predictions[:,:,t]=pred_next
      caches.append(cache)
      c[:,:,t]=cell_state

    caches = (caches, x)
    
    return a, predictions,c, caches
np.random.seed(1)
x = np.random.randn(3,10,7)
a0 = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}

a, y, c, caches = lstm_forward(x, a0, parameters)
print("a[4][3][6] = ", a[4][3][6])
print("a.shape = ", a.shape)
print("y[1][4][3] =", y[1][4][3])
print("y.shape = ", y.shape)
print("caches[1][1[1]] =", caches[1][1][1])
print("c[1][2][1]", c[1][2][1])
print("len(caches) = ", len(caches))



a[4][3][6] =  0.6951498862664757
a.shape =  (5, 10, 7)
y[1][4][3] = 0.9705921702877582
y.shape =  (2, 10, 7)
caches[1][1[1]] = [ 0.82797464  0.23009474  0.76201118 -0.22232814 -0.20075807  0.18656139
  0.41005165]
c[1][2][1] -0.8577763469467968
len(caches) =  2
