# API 调用

In [1]:
import torch
import torch.nn as nn
#实例化单向单层RNN
single_RNN = nn.RNN(input_size=4,hidden_size=3,num_layers=1,\
      batch_first=True)
input_num = torch.randn(1,2,4)#bs,seq_len,feature_size

output,h_n=single_RNN(input_num)#输出所有状态
print(output)#1*2*3
print(h_n)#1*1*3

#双向、单层RNN
Bi_RNN = nn.RNN(input_size=4,hidden_size=3,num_layers=1,\
      bidirectional=True,batch_first=True)
output,h_n = Bi_RNN(input_num)
print('-'*100)
print(output)#1*2*6:代表两个h_t做concatnate，前面的是forward RNN
print(h_n)#1*1*6

tensor([[[-0.6487, -0.7867,  0.8898],
         [ 0.4890,  0.9394,  0.5705]]], grad_fn=<TransposeBackward1>)
tensor([[[0.4890, 0.9394, 0.5705]]], grad_fn=<StackBackward0>)
----------------------------------------------------------------------------------------------------
tensor([[[-0.7550, -0.4489, -0.2163,  0.0495, -0.1725,  0.8771],
         [-0.7343, -0.6182, -0.3029,  0.2824, -0.9356,  0.6264]]],
       grad_fn=<TransposeBackward1>)
tensor([[[-0.7343, -0.6182, -0.3029]],

        [[ 0.0495, -0.1725,  0.8771]]], grad_fn=<StackBackward0>)


# 单向和双向单层RNN的逐行实现

![%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8A%E5%8D%889.44.46.png](attachment:%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8A%E5%8D%889.44.46.png)

![%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8A%E5%8D%8811.06.25.png](attachment:%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8A%E5%8D%8811.06.25.png)

In [2]:
bs,T = 2,3 #batch_size,seq_len
input_size,hidden_size = 2,3#输入，隐含层特征大小
input = torch.randn(bs,T,input_size)
h_prev = torch.zeros(bs,hidden_size) #初始隐含状态

#调用api
rnn = nn.RNN(input_size,hidden_size,num_layers=1,\
      batch_first=True)
rnn_output,state_final = rnn(input,h_prev.unsqueeze(0))

# print('Pytorch API output:')
# print(rnn_output)
# print(state_final)
#手写rnn_forward函数
def rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev):
    bs,T,input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs,T,h_dim)#初始化输出状态矩阵
    
    for t in range(T):
        x = input[:,t,:].unsqueeze(2)#当前时刻输入特征，bs*input_size*1
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs,1,1) #bs*h_dim*input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs,1,1)#bs*h_dim*h_dim
        
        w_times_x = torch.bmm(w_ih_batch,x).squeeze(-1)#bs*h_dim
        w_times_h = torch.bmm(w_hh_batch,h_prev.unsqueeze(2)).squeeze(-1)#bs*h_dim
        h_prev = torch.tanh(w_times_x+bias_ih+w_times_h+bias_hh)
        
        h_out[:,t,:] = h_prev
        
    return h_out,h_prev.unsqueeze(0)


#验证正确性
# for k,v in rnn.named_parameters():
#     print(k,v)
    
custom_rnn_output,custom_state_final = \
            rnn_forward(input,rnn.weight_ih_l0,\
            rnn.weight_hh_l0,rnn.bias_ih_l0,rnn.bias_hh_l0,h_prev)
# print('\n rnn_forward function output:')

# print(custom_rnn_output)
# print(custom_state_final)


#bi-forward函数
def bi_rnn_forward(input,weight_ih,weight_hh,\
        bias_ih,bias_hh,h_prev,weight_ih_reverse,\
        weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,\
        h_prev_reverse):
    bs,T,input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs,T,h_dim*2)#初始化输出状态矩阵
    #forward
    forward_output = rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev)[0]
    #backward
    backward_output = torch.flip(rnn_forward(torch.flip(input,[1]),weight_ih_reverse,\
        weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,\
        h_prev_reverse)[0],[1])
    
    h_out[:,:,:h_dim] = forward_output
    h_out[:,:,h_dim:] = backward_output
    
    return h_out,torch.cat([h_out[:,-1,:h_dim].reshape(bs,1,h_dim).transpose(0,1),\
                            h_out[:,0,h_dim:].reshape(bs,1,h_dim).transpose(0,1)],0)

#验证正确性
bi_rnn = nn.RNN(input_size,hidden_size,num_layers=1,\
      batch_first=True,bidirectional=True)
h_prev = torch.zeros(2,bs,hidden_size)
bi_rnn_output,bi_state_final = bi_rnn(input,h_prev)
print('Pytorch API output:')
print(bi_rnn_output)
print(bi_state_final)

# for k,v in bi_rnn.named_parameters():
#     print(k,v)
custom_bi_rnn_output,custom_bi_rnn_state_final = bi_rnn_forward(input,bi_rnn.weight_ih_l0,bi_rnn.weight_hh_l0,\
        bi_rnn.bias_ih_l0,bi_rnn.bias_hh_l0,h_prev[0],bi_rnn.weight_ih_l0_reverse,\
        bi_rnn.weight_hh_l0_reverse,bi_rnn.bias_ih_l0_reverse,bi_rnn.bias_hh_l0_reverse,\
        h_prev[1])

print('\n bi_rnn_forward function output:')
print(custom_bi_rnn_output)
print(custom_bi_rnn_state_final)


Pytorch API output:
tensor([[[-0.7090, -0.8978, -0.9958, -0.7433,  0.0913,  0.3168],
         [ 0.7019,  0.4100, -0.3001, -0.1927,  0.5438,  0.2546],
         [ 0.9576,  0.1520,  0.3943, -0.5487,  0.6656,  0.2121]],

        [[ 0.7799,  0.1739, -0.6690, -0.6177,  0.6237,  0.0070],
         [ 0.9467, -0.0030,  0.5112, -0.0109,  0.3800,  0.6153],
         [-0.4308, -0.8283, -0.9898, -0.7668,  0.5490, -0.2273]]],
       grad_fn=<TransposeBackward1>)
tensor([[[ 0.9576,  0.1520,  0.3943],
         [-0.4308, -0.8283, -0.9898]],

        [[-0.7433,  0.0913,  0.3168],
         [-0.6177,  0.6237,  0.0070]]], grad_fn=<StackBackward0>)

 bi_rnn_forward function output:
tensor([[[-0.7090, -0.8978, -0.9958, -0.7433,  0.0913,  0.3168],
         [ 0.7019,  0.4100, -0.3001, -0.1927,  0.5438,  0.2546],
         [ 0.9576,  0.1520,  0.3943, -0.5487,  0.6656,  0.2121]],

        [[ 0.7799,  0.1739, -0.6690, -0.6177,  0.6237,  0.0070],
         [ 0.9467, -0.0030,  0.5112, -0.0109,  0.3800,  0.6153],
      

# LSTM & LSTMP

![%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8B%E5%8D%888.16.27.png](attachment:%E6%88%AA%E5%B1%8F2022-04-26%20%E4%B8%8B%E5%8D%888.16.27.png)

In [3]:
#LSTM,LSTMP
bs,T,i_size,h_size = 2,3,4,5
input = torch.randn(bs,T,i_size)
c0 = torch.randn(bs,h_size)
h0 = torch.randn(bs,h_size)

lstm_layer = nn.LSTM(input_size=i_size,\
        hidden_size=h_size,batch_first=True)
output,(h_final,c_final) =lstm_layer(input,(h0.unsqueeze(0),\
                  c0.unsqueeze(0)))
print('Pytorch API:')
print(output)#bs*seq_len*h_size
print('-'*100)
for k,v in lstm_layer.named_parameters():
    print(k,v.shape)
    
#自己写LSTM
def lstm_forward(input,initial_states,w_ih,w_hh,b_ih,b_hh,w_hr=None):
    h_0,c_0 = initial_states
    bs,T,i_size = input.shape
    h_size = w_ih.shape[0] // 4
    
    prev_h = h_0
    prev_c = c_0
    batch_w_ih = w_ih.unsqueeze(0).tile(bs,1,1)
    batch_w_hh = w_hh.unsqueeze(0).tile(bs,1,1)
    if w_hr is not None:
        output_size = w_hr.shape[0]
        batch_w_hr = w_hr.unsqueeze(0).tile(bs,1,1)#(bs,p_size,h_size)
    else:
        output_size = h_size
    
    output = torch.zeros(bs,T,output_size)
    
    for t in range(T):
        x = input[:,t,:] #(bs,i_size)
        w_times_x = torch.bmm(batch_w_ih,x.unsqueeze(-1))
        w_times_x = w_times_x.squeeze(-1)  #(bs,4*h_size)
        
        w_times_h_prev = torch.bmm(batch_w_hh,prev_h.unsqueeze(-1))
        w_times_h_prev = w_times_h_prev.squeeze(-1) #(bs,4*h_size)
        
        #计算遗忘(f)，输入(i)，cell(g)，输出门(o)
        i_t = torch.sigmoid(w_times_x[:,:h_size]+\
            w_times_h_prev[:,:h_size]+b_ih[:h_size]+b_hh[:h_size])
        f_t = torch.sigmoid(w_times_x[:,h_size:2*h_size]+\
            w_times_h_prev[:,h_size:2*h_size]+b_ih[h_size:2*h_size]+b_hh[h_size:2*h_size])
        g_t = torch.tanh(w_times_x[:,2*h_size:3*h_size]+\
            w_times_h_prev[:,2*h_size:3*h_size]+b_ih[2*h_size:3*h_size]+b_hh[2*h_size:3*h_size])
        o_t = torch.sigmoid(w_times_x[:,3*h_size:]+\
            w_times_h_prev[:,3*h_size:]+b_ih[3*h_size:]+b_hh[3*h_size:])
        
        #更新c_t和h_t
        prev_c = f_t*prev_c+i_t*g_t
        prev_h = o_t*torch.tanh(prev_c) #(bs,h_size)
        
        if w_hr is not None:  #做projection
            prev_h = torch.bmm(batch_w_hr,prev_h.unsqueeze(-1))
            prev_h = prev_h.squeeze(-1)
            
        output[:,t,:] = prev_h
        
    return output,(prev_h,prev_c)
    

output_custom,(h_final_custom,c_final_custom) = lstm_forward(input,(h0,c0),\
    lstm_layer.weight_ih_l0,lstm_layer.weight_hh_l0,\
    lstm_layer.bias_ih_l0,lstm_layer.bias_hh_l0)


print('Custom lstm_forward:')
print(output_custom)

#LSTM projection
#API


Pytorch API:
tensor([[[-0.3097, -0.2295, -0.2703,  0.1717, -0.2675],
         [-0.1469, -0.0129, -0.0467, -0.0493, -0.0328],
         [-0.2623,  0.1193, -0.0625, -0.0692, -0.1644]],

        [[-0.1851,  0.2101, -0.4214,  0.0537, -0.2036],
         [-0.2892,  0.0554, -0.1137,  0.0034, -0.0883],
         [-0.1438, -0.0080,  0.0394, -0.1018, -0.0203]]],
       grad_fn=<TransposeBackward0>)
----------------------------------------------------------------------------------------------------
weight_ih_l0 torch.Size([20, 4])
weight_hh_l0 torch.Size([20, 5])
bias_ih_l0 torch.Size([20])
bias_hh_l0 torch.Size([20])
Custom lstm_forward:
tensor([[[-0.3097, -0.2295, -0.2703,  0.1717, -0.2675],
         [-0.1469, -0.0129, -0.0467, -0.0493, -0.0328],
         [-0.2623,  0.1193, -0.0625, -0.0692, -0.1644]],

        [[-0.1851,  0.2101, -0.4214,  0.0537, -0.2036],
         [-0.2892,  0.0554, -0.1137,  0.0034, -0.0883],
         [-0.1438, -0.0080,  0.0394, -0.1018, -0.0203]]], grad_fn=<CopySlices>)


In [4]:
#LSTM projection
"""
作用：相当于给h_t做压缩，以前的h_t维度依赖于c_t的维度
引入projection，h_t的初值可以变成proj_size，且在最后递归
计算h_t的时候用一个新的（proj_size,h_size）大小的矩阵对h_t做压缩
以减小整个网络的计算量和参数量
"""
bs,T,i_size,h_size = 2,3,4,5
proj_size = 3
input = torch.randn(bs,T,i_size)
c0 = torch.randn(bs,h_size)
h0 = torch.randn(bs,proj_size)

lstm_layer = nn.LSTM(i_size,h_size,\
  batch_first=True,proj_size=proj_size)
output,(h_final,c_final) =lstm_layer(input,(h0.unsqueeze(0),\
        c0.unsqueeze(0)))
print('Pytorch API:')

print(output)#bs*seq_len*proj_size
for k,v in lstm_layer.named_parameters():
    print(k,v.shape)#weight_hr_l0 相当于给hidden状态做了压缩
    
output_custom,(h_final_custom,c_final_custom) = lstm_forward(input,(h0,c0),\
lstm_layer.weight_ih_l0,lstm_layer.weight_hh_l0,\
lstm_layer.bias_ih_l0,
lstm_layer.bias_hh_l0,lstm_layer.weight_hr_l0)
print(output_custom)

Pytorch API:
tensor([[[-0.2144, -0.0574, -0.1876],
         [-0.0580, -0.0700, -0.1161],
         [-0.0740,  0.0044, -0.1373]],

        [[-0.1526, -0.0187, -0.0072],
         [-0.1558,  0.0345, -0.0434],
         [-0.0872,  0.0163, -0.0755]]], grad_fn=<TransposeBackward0>)
weight_ih_l0 torch.Size([20, 4])
weight_hh_l0 torch.Size([20, 3])
bias_ih_l0 torch.Size([20])
bias_hh_l0 torch.Size([20])
weight_hr_l0 torch.Size([3, 5])
tensor([[[-0.2144, -0.0574, -0.1876],
         [-0.0580, -0.0700, -0.1161],
         [-0.0740,  0.0044, -0.1373]],

        [[-0.1526, -0.0187, -0.0072],
         [-0.1558,  0.0345, -0.0434],
         [-0.0872,  0.0163, -0.0755]]], grad_fn=<CopySlices>)


# GRU

![%E6%88%AA%E5%B1%8F2022-04-28%20%E4%B8%8A%E5%8D%8811.44.41.png](attachment:%E6%88%AA%E5%B1%8F2022-04-28%20%E4%B8%8A%E5%8D%8811.44.41.png)

In [8]:
def gru_forward(input,initial_states,w_ih,w_hh,b_ih,b_hh):
    prev_h = initial_states#(bs,h_size)
    bs,T,i_size = input.shape
    h_size = w_ih.shape[0] // 3 #因为公式中只有三个W
    #对参数矩阵进行处理
    batch_w_ih = w_ih.unsqueeze(0).tile(bs,1,1)
    batch_w_hh = w_hh.unsqueeze(0).tile(bs,1,1)
    
    output = torch.zeros(bs,T,h_size)#GRU网络输出状态序列
    
    for t in range(T):
        x = input[:,t,:]#(bs,i_size) 
        w_times_x = torch.bmm(batch_w_ih,x.unsqueeze(-1))
        w_times_x = w_times_x.squeeze(-1)  #(bs,3*h_size)
        
        w_times_h = torch.bmm(batch_w_hh,prev_h.unsqueeze(-1))
        w_times_h = w_times_h.squeeze(-1) #(bs,3*h_size)
        
        #计算重置门，更新门，候选状态,真实状态
        r_t = torch.sigmoid(w_times_x[:,:h_size]+b_ih[:h_size]+w_times_h[:,:h_size]+b_hh[:h_size])
        z_t = torch.sigmoid(w_times_x[:,h_size:2*h_size]+\
                            b_ih[h_size:2*h_size]+w_times_h[:,h_size:2*h_size]\
                            +b_hh[h_size:2*h_size])
        n_t = torch.tanh(w_times_x[:,2*h_size:3*h_size]+\
                         b_ih[2*h_size:3*h_size]+r_t*(w_times_h[:,2*h_size:3*h_size]+\
                            b_hh[2*h_size:3*h_size]))
        prev_h = (1-z_t)*n_t+z_t*prev_h
        
        output[:,t,:] = prev_h
        
    return output,prev_h


bs,T,i_size,h_size = 2,3,4,5
input = torch.randn(bs,T,i_size)
h0 = torch.randn(bs,h_size)
gru_layer = nn.GRU(input_size=i_size,hidden_size=h_size,batch_first=True)

output,h_final = gru_layer(input,h0.unsqueeze(0))

print('Pytorch API:')
print(output)#bs*seq_len*h_size
print('-'*100)
# for k,v in gru_layer.named_parameters():
#     print(k,v.shape)

custom_output,custom_h_final = gru_forward(input,h0,\
                        gru_layer.weight_ih_l0,\
                    gru_layer.weight_hh_l0,gru_layer.bias_ih_l0,\
                        gru_layer.bias_hh_l0)

print('custom gru layer output:')
print(custom_output)

#可以判断torch中的浮点数是否接近
print(torch.allclose(output,custom_output))

Pytorch API:
tensor([[[-0.8013, -0.1355,  1.1091, -0.8618,  1.0831],
         [ 0.0465, -0.0146,  0.4839, -0.4071,  0.5657],
         [-0.0359,  0.2280,  0.4099, -0.2462,  0.2263]],

        [[ 0.0554,  0.1258, -0.6549,  0.5053,  0.3982],
         [-0.0111,  0.0040, -0.5205,  0.1017,  0.3044],
         [ 0.0249,  0.3381, -0.3978, -0.5136,  0.3629]]],
       grad_fn=<TransposeBackward1>)
----------------------------------------------------------------------------------------------------
custom gru layer output:
tensor([[[-0.8013, -0.1355,  1.1091, -0.8618,  1.0831],
         [ 0.0465, -0.0146,  0.4839, -0.4071,  0.5657],
         [-0.0359,  0.2280,  0.4099, -0.2462,  0.2263]],

        [[ 0.0554,  0.1258, -0.6549,  0.5053,  0.3982],
         [-0.0111,  0.0040, -0.5205,  0.1017,  0.3044],
         [ 0.0249,  0.3381, -0.3978, -0.5136,  0.3629]]], grad_fn=<CopySlices>)
True
