# 1-一步步搭建循环神经网络
## 使用numpy搭建RNN前向传播和LSTM前向传播

In [1]:
import numpy as np
import rnn_utils

### (1) RNN及前向传播

In [2]:
def rnn_cell_forward(xt,a_prev,params):
    """RNN单元前向传播"""
    Wax = params["Wax"]
    Waa = params["Waa"]
    Wya = params["Wya"]
    ba = params["ba"]
    by = params["by"]
    
    a_next = np.tanh(np.dot(Waa,a_prev) + np.dot(Wax,xt) + ba)
    
    yt_pred = rnn_utils.softmax(np.dot(Wya,a_next) + by)
    
    cache = (a_next,a_prev,xt,params)
    
    return a_next,yt_pred,cache

In [3]:
# 测试RNN单元前向传播
np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
params = {"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}

a_next,yt_pred,cache = rnn_cell_forward(xt,a_prev,params)
a_next[4],a_next.shape,yt_pred[1],yt_pred.shape

(array([ 0.59584544,  0.18141802,  0.61311866,  0.99808218,  0.85016201,
         0.99980978, -0.18887155,  0.99815551,  0.6531151 ,  0.82872037]),
 (5, 10),
 array([0.9888161 , 0.01682021, 0.21140899, 0.36817467, 0.98988387,
        0.88945212, 0.36920224, 0.9966312 , 0.9982559 , 0.17746526]),
 (2, 10))

In [4]:
def rnn_forward(x,a0,params):
    """RNN前向传播"""
    caches = []
    
    n_x,m,T_x = x.shape
    n_y,n_a = params["Wya"].shape
    
    a = np.zeros([n_a,m,T_x])
    y_pred = np.zeros([n_y,m,T_x])
    
    a_next = a0
    
    for t in range(T_x):
        a_next,yt_pred,cache = rnn_cell_forward(x[:,:,t],a_next,params)
        
        a[:,:,t] = a_next
        
        y_pred[:,:,t] = yt_pred
        
        caches.append(cache)
        
    caches = caches,x
    
    return a,y_pred,caches

In [5]:
# 测试RNN前向传播
np.random.seed(1)
x = np.random.randn(3,10,4)
a0= np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
params = {"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}

a,y_pred,caches = rnn_forward(x,a0,params)

a[4][1],a.shape,y_pred[1][3],y_pred.shape,caches[1][1][3],len(caches)

(array([-0.99999375,  0.77911235, -0.99861469, -0.99833267]),
 (5, 10, 4),
 array([0.79560373, 0.86224861, 0.11118257, 0.81515947]),
 (2, 10, 4),
 array([-1.1425182 , -0.34934272, -0.20889423,  0.58662319]),
 2)

### （2）LSTM及前向传播

In [9]:
def lstm_cell_forward(xt,a_prev,c_prev,params):
    """LSTM单元前向传播"""
    Wf = params["Wf"]
    bf = params["bf"]
    Wi = params["Wi"]
    bi = params["bi"]
    Wc = params["Wc"]
    bc = params["bc"]
    Wo = params["Wo"]
    bo = params["bo"]
    Wy = params["Wy"]
    by = params["by"]
    
    n_x,m = xt.shape
    n_y,n_a = Wy.shape
    
    # 链接a_prev和xt
    contact = np.zeros([n_a + n_x,m])
    contact[:n_a,:] = a_prev
    contact[n_a:,:] = xt
    
    # 遗忘门
    ft = rnn_utils.sigmoid(np.dot(Wf,contact) + bf)
    
    # 更新门
    it = rnn_utils.sigmoid(np.dot(Wi,contact) + bi)
    
    # 更新单元
    cct = np.tanh(np.dot(Wc,contact) + bc)
    c_next = ft * c_prev + it * cct
    
    # 输出门
    ot = rnn_utils.sigmoid(np.dot(Wo,contact) + bo)
    a_next = ot * np.tanh(c_next)
    
    yt_pred = rnn_utils.softmax(np.dot(Wy,a_next) + by)
    
    
    cache = (a_next,c_next,a_prev,c_prev,ft,it,cct,ot,xt,params)
    
    return a_next,c_next,yt_pred,cache

In [10]:
# 测试LSTM单元前向传播
np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
c_prev = np.random.randn(5,10)
Wf = np.random.randn(5,5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5,5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5,5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5,5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

params = {"Wf":Wf,"Wi":Wi,"Wo":Wo,"Wc":Wc,"Wy":Wy,"bf":bf,"bi":bi,"bo":bo,"bc":bc,"by":by}

a_next,c_next,yt_pred,cache = lstm_cell_forward(xt,a_prev,c_prev,params)

a_next.shape,c_next.shape,yt_pred.shape,len(cache)

((5, 10), (5, 10), (2, 10), 10)

In [15]:
def lstm_forward(x,a0,params):
    """LSTM前向传播"""
    caches = []
    
    n_x,m,T_x = x.shape
    n_y,n_a = params["Wy"].shape
    
    a = np.zeros([n_a,m,T_x])
    c = np.zeros([n_a,m,T_x])
    y = np.zeros([n_y,m,T_x])

    a_next = a0
    c_next = np.zeros([n_a,m])
    
    for t in range(T_x):
        a_next,c_next,yt_pred,cache = lstm_cell_forward(x[:,:,t],a_next,c_next,params)
        
        a[:,:,t] = a_next
        c[:,:,t] = c_next
        y[:,:,t] = yt_pred
        caches.append(cache)
        
        
    caches = caches,x
    return a,y,c,caches

In [16]:
# 测试LSTM前向传播
np.random.seed(1)
x = np.random.randn(3,10,7)
a0 = np.random.randn(5,10)
Wf = np.random.randn(5,5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5,5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5,5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5,5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

params = {"Wf":Wf,"Wi":Wi,"Wo":Wo,"Wc":Wc,"Wy":Wy,"bf":bf,"bi":bi,"bo":bo,"bc":bc,"by":by}

a,y,c,caches = lstm_forward(x,a0,params)

a.shape,y.shape,len(caches)

((5, 10, 7), (2, 10, 7), 2)