In [2]:
import torch
import torch.nn as nn

bs, T = 2, 3   # 批大小，输入序列长度
input_size, hidden_size = 2, 3 # 输入特征大小，隐含层特征大小
input  = torch.randn(bs, T, input_size)    # 随机初始化一个输入特征序列
h_prev = torch.zeros(bs, hidden_size)     # 初始隐含状态

# step1 调用pytorch RNN API
rnn = nn.RNN(input_size, hidden_size, batch_first = True)
rnn_output, state_final = rnn(input, h_prev.unsqueeze(0))
print(rnn_output)
print(state_final)

tensor([[[ 0.1773, -0.1150, -0.5396],
         [ 0.2456, -0.4637, -0.8172],
         [ 0.7823, -0.7720, -0.8598]],

        [[ 0.4552,  0.3404,  0.0755],
         [ 0.5677,  0.2616, -0.0390],
         [ 0.8455, -0.7918, -0.8138]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.7823, -0.7720, -0.8598],
         [ 0.8455, -0.7918, -0.8138]]], grad_fn=<StackBackward0>)


# 单向单层RNN

In [6]:
# step2 手写一个rnn_forward函数，实现RNN的计算原理（计算公式可以看pytorch官方文档）
# ht = tanh(Wix*Xt + bix + Whh*h(t-1) + bhh)
def rnn_forward(input, weight_ix, weight_h, bias_ix, bias_h, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ix.shape[0]
    h_out = torch.zeros(bs, T, h_dim)     # 初始化一个输出（状态）矩阵

    for t in range(T):
        x = input[:, t, :].unsqueeze(2)       # 当前时刻的输入特征， bs*input_size*1
        w_ix_batch = weight_ix.unsqueeze(0).tile(bs, 1, 1)    # 先升1维然后再复制到每一个batch上，因为初始weight_ix一般是一个二维矩阵
        w_hh_batch = weight_h.unsqueeze(0).tile(bs, 1, 1)

        w_times_x = torch.bmm(w_ix_batch, x).squeeze(-1)    # bs*h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1)

        h_prev = torch.tanh(w_times_x+bias_ix+w_times_h+bias_h)
        h_out[:,t,:] = h_prev
    
    return h_out, h_prev.unsqueeze(0)

custom_rnn_output, custom_state_final = rnn_forward(input, rnn.weight_ih_l0, rnn.weight_hh_l0, rnn.bias_ih_l0, rnn.bias_hh_l0, h_prev)
print(custom_rnn_output )
print(custom_state_final )
# 发现结果和官方API一致

tensor([[[ 0.1773, -0.1150, -0.5396],
         [ 0.2456, -0.4637, -0.8172],
         [ 0.7823, -0.7720, -0.8598]],

        [[ 0.4552,  0.3404,  0.0755],
         [ 0.5677,  0.2616, -0.0390],
         [ 0.8455, -0.7918, -0.8138]]], grad_fn=<CopySlices>)
tensor([[[ 0.7823, -0.7720, -0.8598],
         [ 0.8455, -0.7918, -0.8138]]], grad_fn=<UnsqueezeBackward0>)


# 双向RNN

In [None]:
def birectional_rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev,
        weight_ih_reverse,weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,h_prev_reverse):
    
    bs,T,input_size = input.size()
    h_dim = weight_ih.shape[0]       #这个维度是根据公式来判断的
    h_out = torch.zeros(bs,T,h_dim*2)  #初始化一个输出(状态)矩阵,注意这边是双向的结构，所以要乘以2

    forward_output = rnn_forward(input,weight_ih,weight_hh,bias_ih,bias_hh,h_prev)[0]   #2*3*3
    backward_output = rnn_forward(torch.flip(input,[1]),weight_ih_reverse,weight_hh_reverse,bias_ih_reverse,bias_hh_reverse,h_prev_reverse)[0]

    h_out[:,:,:h_dim] = forward_output
    h_out[:,:,h_dim:] = torch.flip(backward_output,[1])

    h_n= torch.zeros(bs,2,h_dim) 
    h_n[:,0,:] = forward_output[:,-1,:]
    h_n[:,1,:] = backward_output[:,-1,:]

    h_n = h_n.transpose(0,1)    
    return h_out,h_n