In [0]:
import numpy as np

In [0]:
def softmax(x):
  e_x = np.exp(x-np.max(x))
  return e_x/e_x.sum(axis=0)

def sigmoid(x):
  return 1/(1+np.exp(-x))

In [0]:
def initialize_adam(parameters):
  L = len(parameters)  #no. of layers in neural network
  v = {}    # pyhton dictionary contains weighted average of gradient
  s = {}    # python dictionary contains weighted average of squared gradient

  for l in range(L):
    v["dW" + str(l+1)] = np.zeros(parameters("W" + str(l+1)).shape)
    v["db" + str(l+1)] = np.zeros(parameters("b" + str(l+1)).shape)
    s["dW" + str(l+1)] = np.zeros(parameters("W" + str(l+1)).shape)
    s["db" + str(l+1)] = np.zeros(parameters("b" + str(l+1)).shape)

    return v,s 
  def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate=0.01, beta1=0.9, beta2 = 0.999, epsilon = 1e-8):
    L = len(parameters)
    v_corrected = {}
    s_corrected = {}

    for l in range (L):
      v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1-beta1) * v["dW" + str(l+1)]
      v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1-beta1) * v["db" + str(l+1)]

      v_corrected = v["dW" + str(l+1)] / (1-beta1 ** t)
      v_corrected = v["db" + str(l+1)] / (1-beta1 ** t)

      s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1-beta2) * (grads["dW" + str(l+1) ** 2])
      s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1-beta2) * (grads["db" + str(l+1) ** 2])

      s_corrected = s["dW" + str(l+1)] / (1 - beta2 ** t)
      s_corrected = s["db" + str(l+1)] / (1 - beta2 ** t)

      parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)
      parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)

      return parameters, v, s 

  


### **Forward propagation for the basic Recurrent Neural Network**

In [0]:
def rnn_cell_forward(xt, a_prev, parameters):
  Wax = parameters["Wax"]
  Waa = parameters["Waa"]
  Wya = parameters["Wya"]
  ba = parameters["ba"]
  by = parameters["by"]

  a_next = np.tanh((np.dot(Waa,a_prev) + np.dot(Wax,xt) + ba))
  yt_pred = softmax(np.dot(Wya,a_next) + by)

  cache = (a_next, a_prev, xt, parameters)  # for backward propagation

  return a_next, yt_pred, cache

In [5]:
np.random.seed(1)
xt_temp = np.random.randn(3,10)
a_prev_temp = np.random.randn(5,10)

parameters_temp ={}
parameters_temp['Waa'] = np.random.randn(5,5)
parameters_temp['Wax'] = np.random.randn(5,3)
parameters_temp['Wya'] = np.random.randn(2,5)
parameters_temp['ba'] = np.random.randn(5,1)
parameters_temp['by'] = np.random.randn(2,1)

a_next_temp, yt_pred_temp, cache_temp = rnn_cell_forward(xt_temp, a_prev_temp, parameters_temp)
print("\n", a_next_temp)
print("\n", a_next_temp.shape)
print("\n", yt_pred_temp)
print("\n", yt_pred_temp.shape)


 [[ 0.95442347 -0.97959841 -0.77682357 -0.85960496  0.2996283  -0.72828789
   0.70341981  0.396781    0.64215271 -0.68720152]
 [-0.77817006 -0.96939535 -0.90158668 -0.89269334 -0.94794605 -0.62569074
  -0.7847199   0.73807292  0.40638533 -0.49874722]
 [ 0.34337788 -0.99997631 -0.99692205 -0.98133709 -0.93123291 -0.99802557
  -0.99662894 -0.93641136 -0.25153222  0.54770565]
 [-0.85404662  0.97190276  0.60516394  0.65999969 -0.68038654  0.09222782
   0.34729991  0.41705046 -0.44431726  0.74395075]
 [ 0.59584544  0.18141802  0.61311866  0.99808218  0.85016201  0.99980978
  -0.18887155  0.99815551  0.6531151   0.82872037]]

 (5, 10)

 [[0.0111839  0.98317979 0.78859101 0.63182533 0.01011613 0.11054788
  0.63079776 0.0033688  0.0017441  0.82253474]
 [0.9888161  0.01682021 0.21140899 0.36817467 0.98988387 0.88945212
  0.36920224 0.9966312  0.9982559  0.17746526]]

 (2, 10)


In [0]:
def rnn_forward(x, a0, parameters):
  caches =[]
  n_x, m, T_x = x.shape
  n_y, n_a = parameters["Wya"].shape

  a = np.zeros((n_a, m, T_x))
  y_pred = np.zeros((n_y, m, T_x))

  a_next = a0

  for t in range (T_x):
    xt = x[:,:,t]
    a_next, yt_pred, cache = rnn_cell_forward(xt, a0, parameters)
    a[:,:,t] = a_next
    y_pred[:,:,t] = yt_pred
    caches.append(cache)
    caches = (caches, x)
    
    return a, y_pred, caches

In [7]:
np.random.seed(1)
x_tmp = np.random.randn(3,10,4)
a0_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Waa'] = np.random.randn(5,5)
parameters_tmp['Wax'] = np.random.randn(5,3)
parameters_tmp['Wya'] = np.random.randn(2,5)
parameters_tmp['ba'] = np.random.randn(5,1)
parameters_tmp['by'] = np.random.randn(2,1)

a_tmp, y_pred_tmp, caches_tmp = rnn_forward(x_tmp, a0_tmp, parameters_tmp)
print("a[4][1] = \n", a_tmp[4][1])
print("a.shape = \n", a_tmp.shape)
print("y_pred[1][3] =\n", y_pred_tmp[1][3])
print("y_pred.shape = \n", y_pred_tmp.shape)
print("caches[1][1][3] =\n", caches_tmp[1][1][3])
print("len(caches) = \n", len(caches_tmp))

a[4][1] = 
 [-0.99999375  0.          0.          0.        ]
a.shape = 
 (5, 10, 4)
y_pred[1][3] =
 [0.79560373 0.         0.         0.        ]
y_pred.shape = 
 (2, 10, 4)
caches[1][1][3] =
 [-1.1425182  -0.34934272 -0.20889423  0.58662319]
len(caches) = 
 2


### **Long Short-Term Memory (LSTM) Network**

In [0]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
  Wf = parameters["Wf"]  # forget gate weight
  bf = parameters["bf"]
  Wi = parameters["Wi"]  # update gate weight
  bi = parameters["bi"]
  Wo = parameters["Wo"]  # output gate weight
  bo = parameters["bo"]
  Wy = parameters["Wy"]  # prediction weight
  by = parameters["by"]
  Wc = parameters["Wc"]  # candidate value weight
  bc = parameters["bc"]

  n_x, m = xt.shape
  n_y, n_a = Wy.shape

  concat = np.zeros((n_a + n_x, m))
  concat[: n_a,:] = a_prev
  concat[n_a :,:] = xt

  ft = sigmoid(np.dot(Wf,concat) + bf) 
  it = sigmoid(np.dot(Wi, concat) + bi)
  cct = np.tanh(np.dot(Wc, concat) + bc)
  c_next = it * cct + ft*c_prev
  ot = sigmoid(np.dot(Wo, concat) + bo)
  a_next = ot * np.tanh(c_next)

  yt_pred = softmax(np.dot(Wy, c_next) + by)

  cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)

  return a_next, c_next, yt_pred, cache




In [9]:
np.random.seed(1)
xt_tmp = np.random.randn(3,10)
a_prev_tmp = np.random.randn(5,10)
c_prev_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['bf'] = np.random.randn(5,1)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['bi'] = np.random.randn(5,1)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['bo'] = np.random.randn(5,1)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['bc'] = np.random.randn(5,1)
parameters_tmp['Wy'] = np.random.randn(2,5)
parameters_tmp['by'] = np.random.randn(2,1)

a_next_tmp, c_next_tmp, yt_tmp, cache_tmp = lstm_cell_forward(xt_tmp, a_prev_tmp, c_prev_tmp, parameters_tmp)
print("a_next[4] = \n", a_next_tmp[4])
print("a_next.shape = ", c_next_tmp.shape)
print("c_next[2] = \n", c_next_tmp[2])
print("c_next.shape = ", c_next_tmp.shape)
print("yt[1] =", yt_tmp[1])
print("yt.shape = ", yt_tmp.shape)
print("cache[1][3] =\n", cache_tmp[1][3])
print("len(cache) = ", len(cache_tmp))

a_next[4] = 
 [-0.66408471  0.0036921   0.02088357  0.22834167 -0.85575339  0.00138482
  0.76566531  0.34631421 -0.00215674  0.43827275]
a_next.shape =  (5, 10)
c_next[2] = 
 [ 0.63267805  1.00570849  0.35504474  0.20690913 -1.64566718  0.11832942
  0.76449811 -0.0981561  -0.74348425 -0.26810932]
c_next.shape =  (5, 10)
yt[1] = [0.93502843 0.01259329 0.14295783 0.12709192 0.99955699 0.0092636
 0.0017344  0.11317375 0.89929897 0.11418954]
yt.shape =  (2, 10)
cache[1][3] =
 [-0.16263996  1.03729328  0.72938082 -0.54101719  0.02752074 -0.30821874
  0.07651101 -1.03752894  1.41219977 -0.37647422]
len(cache) =  10


In [0]:
def lstm_forward(x, a0, parameters):

  caches = []

  Wy = parameters["Wy"]

  n_x, m, T_x = x.shape
  n_y, n_a = parameters['Wy'].shape

  a = np.zeros((n_a,m,T_x))
  c = np.zeros((n_a,m,T_x))
  y = np.zeros((n_y,m,T_x))

  a_next = a0
  c_next = np.zeros((n_a,m))

  for t in range(T_x):
    xt = x[:,:,t]
    a_next, c_next, yt, cache = lstm_cell_forward(xt, a_next, c_next, parameters)
    a[:,:,t] = a_next
    c[:,:,t] = c_next 
    y[:,:,t] = yt

    caches.append(cache)

  caches = (caches,x)

  return a,y,c,caches

In [11]:
np.random.seed(1)
x_tmp = np.random.randn(3,10,7)
a0_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['bf'] = np.random.randn(5,1)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['bi']= np.random.randn(5,1)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['bo'] = np.random.randn(5,1)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['bc'] = np.random.randn(5,1)
parameters_tmp['Wy'] = np.random.randn(2,5)
parameters_tmp['by'] = np.random.randn(2,1)

a_tmp, y_tmp, c_tmp, caches_tmp = lstm_forward(x_tmp, a0_tmp, parameters_tmp)
print("a[4][3][6] = ", a_tmp[4][3][6])
print("a.shape = ", a_tmp.shape)
print("y[1][4][3] =", y_tmp[1][4][3])
print("y.shape = ", y_tmp.shape)
print("caches[1][1][1] =\n", caches_tmp[1][1][1])
print("c[1][2][1]", c_tmp[1][2][1])
print("len(caches) = ", len(caches_tmp))

a[4][3][6] =  0.17211776753291672
a.shape =  (5, 10, 7)
y[1][4][3] = 0.9897036469242136
y.shape =  (2, 10, 7)
caches[1][1][1] =
 [ 0.82797464  0.23009474  0.76201118 -0.22232814 -0.20075807  0.18656139
  0.41005165]
c[1][2][1] -0.8555449167181981
len(caches) =  2
