# 单向RNN

In [None]:
import torch
import torch.nn as nn 
batch_size, seq_len = 2,3
input_size, hidden_size = 2,3 # 输入特征大小，隐含层特征大小

input = torch.randn(batch_size,seq_len,input_size) # batch_size *seq_l*feature_size
h_prev = torch.zeros(batch_size, hidden_size) # 初始隐含状态
"""
 h_prev 的形状为 (3, 3)，表示：
# [
#   [0, 0, 0],  # 第一个样本的初始隐藏状态
#   [0, 0, 0],  # 第二个样本的初始隐藏状态
#   [0, 0, 0]  # 第三个样本的初始隐藏状态
# ]
"""

# Step 1 调用PyTorch RNN API
single_rnn = nn.RNN(input_size,hidden_size,batch_first = True) # 实例化
output,hn = single_rnn(input, h_prev.unsqueeze(0))
print(output)
print(hn)

# Step 2 手写一个rnn_forward函数，实现RNN的计算原理
def rnn_forward(input, weight_ih, weight_hh, bias_ih,bias_hh,h_prev):
  
    batch_size, seq_len, input_size = input.shape
    hidden_size = weight_ih.shape[0]
    h_out = torch.zeros(batch_size,seq_len, hidden_size)

    # 递归运算
    for t in range(seq_len):

        x = input[:,t,:].unsqueeze(2) # 获取当前维度的输入,batch_size * input_size 
        weight_ih_batch = weight_ih.unsqueeze(0).tile(batch_size, 1,1 )  # batch_size* hidden_size* input_size
        # weight_ih形状通常是 (hidden_size, input_size)，需要转为batch
        # weight_ih.unsqueeze(0)会将 weight_ih 的形状从 (hidden_size, input_size) 变为 (1, hidden_size, input_size)
        # tile(batch_size, 1, 1)沿着第 0 维（batch 维度）重复 batch_size 次。沿着第 1 维（隐藏层大小维度）重复 1 次。沿着第 2 维（输入特征大小维度）重复 1 次。
        weight_hh_batch = weight_hh.unsqueeze(0).tile(batch_size, 1,1 )   # batch_size* hidden_size* hidden_size

        w_times_x= torch.bmm(weight_ih_batch, x).squeeze(-1) #  batch_size* hidden_size
        w_times_h = torch.bmm(weight_hh_batch, h_prev.unsqueeze(2)).squeeze(-1) #  batch_size* hidden_size

        # print(w_times_x.shape, bias_ih.shape, w_times_h.shape, bias_hh.shape)
        h_prev = torch.tanh(w_times_x + bias_ih + w_times_h + bias_hh)

        h_out[:,t,:] = h_prev

    return h_out, h_prev.unsqueeze(0)

# 验证rnn_forward的准确性：比较output和custom_rnn_out
# for p,name in single_rnn.named_parameters():
#     print(p,name)


custom_rnn_out, custom_final_state = rnn_forward(input, 
                                                 single_rnn.weight_ih_l0, single_rnn.weight_hh_l0, 
                                                 single_rnn.bias_ih_l0, single_rnn.bias_hh_l0,
                                                 h_prev)

print(custom_rnn_out)


tensor([[[ 0.3019,  0.5374,  0.2303],
         [ 0.4626,  0.2900,  0.5116],
         [ 0.5831,  0.5606, -0.3392]],

        [[ 0.5113,  0.4710, -0.3809],
         [ 0.1795,  0.5527,  0.5527],
         [-0.1945,  0.8211,  0.8665]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.5831,  0.5606, -0.3392],
         [-0.1945,  0.8211,  0.8665]]], grad_fn=<StackBackward0>)
torch.Size([2, 3]) torch.Size([3]) torch.Size([2, 3]) torch.Size([3])
torch.Size([2, 3]) torch.Size([3]) torch.Size([2, 3]) torch.Size([3])
torch.Size([2, 3]) torch.Size([3]) torch.Size([2, 3]) torch.Size([3])
tensor([[[ 0.3019,  0.5374,  0.2303],
         [ 0.4626,  0.2900,  0.5116],
         [ 0.5831,  0.5606, -0.3392]],

        [[ 0.5113,  0.4710, -0.3809],
         [ 0.1795,  0.5527,  0.5527],
         [-0.1945,  0.8211,  0.8665]]], grad_fn=<CopySlices>)


# 双向RNN

In [None]:
bidirectional_rnn = nn.RNN(input_size,hidden_size,batch_first = True) # 实例化

# 验证bi_rnn_forward的准确性：比较output和custom_rnn_out
# for p,name in single_rnn.named_parameters():
#     print(p,name)



# Step 3 手写一个bidirectional_rnn_forward函数，实现双向RNN的计算原理
def bidirectional_rnn_forward(input, weight_ih, weight_hh, bias_ih,bias_hh,h_prev,
                              weight_ih_reverse,weight_hh_reverse, bias_ih_reverse,bias_hh_reverse):
    # 所有的参数都是两份的
  
    batch_size, seq_len, input_size = input.shape
    hidden_size = weight_ih.shape[0]
    h_out = torch.zeros(batch_size,seq_len, hidden_size*2) # 双向 hidden_size变成两倍

    
     # 调用两遍rnn_forward
    forward_output, _ = rnn_forward(input, weight_ih, weight_hh, bias_ih,bias_hh,h_prev) # forward layer 
    # backward_layer所有的权重都要用reverse版本，同时对于input 也要reverse一下，因为要保证第一个位置的input是最后一个元素
    input_reverse = torch.flip(input, [1]) # [1]表示中间那一维，也就是在seq_len商
    backward_output,_ =  rnn_forward(input_reverse, weight_ih_reverse, weight_hh_reverse, bias_ih_reverse,bias_hh_reverse,h_prev) # backward layer 

    

    h_out[:,:,:hidden_size] = forward_output
    h_out[:,:,hidden_size:] = backward_output

    # h_prev可以取出h_out的最后一个时刻
    return h_out, h_out[:,-1,:].reshape([batch_size,2,hidden_size]).transpose(0,1)



custom_bi_rnn_out, custom_bi_final_state = bidirectional_rnn_forward(input, 
                                                 single_rnn.weight_ih_l0, single_rnn.weight_hh_l0, 
                                                 single_rnn.bias_ih_l0, single_rnn.bias_hh_l0,
                                                 h_prev,
                                                 )

print(custom_bi_rnn_out)


TypeError: bidirectional_rnn_forward() missing 4 required positional arguments: 'weight_ih_reverse', 'weight_hh_reverse', 'bias_ih_reverse', and 'bias_hh_reverse'