In [7]:
import torch

# 单向、单层RNN
import torch.nn as nn
# 1. 单向、单层RNN
single_rnn = nn.RNN(input_size=4, hidden_size=3, num_layers=1, batch_first=True)
input = torch.rand(1, 2, 4) # (batch, seq_len, input)

output, h_n = single_rnn(input)

print(output, h_n)
(output.shape, h_n.shape)

tensor([[[-0.4708,  0.8791,  0.5277],
         [-0.7765,  0.8896,  0.7426]]], grad_fn=<TransposeBackward1>) tensor([[[-0.7765,  0.8896,  0.7426]]], grad_fn=<StackBackward0>)


(torch.Size([1, 2, 3]), torch.Size([1, 1, 3]))

In [5]:
# 2. 双向、单层RNN
bidirectional_rnn = nn.RNN(input_size=4, hidden_size=3, num_layers=1, batch_first=True, bidirectional=True)

bi_output, bi_h_n = bidirectional_rnn(input)

(bi_output.shape, bi_h_n.shape)

(torch.Size([1, 2, 6]), torch.Size([2, 1, 3]))

In [9]:
import torch
import torch.nn as nn

bs, T = 2, 3 # 批大小， 输入序列长度
input_size, hidden_size = 2, 3 # 输入特征大小，隐含层特征大小
input = torch.randn(bs, T, input_size) # 随机初始化一个输入特征序列
h_prev = torch.zeros(bs, hidden_size) # 初始隐含状态

# step1: 调用PyTorch RNN API
rnn = nn.RNN(input_size, hidden_size, batch_first=True)
rnn_output, state_final = rnn(input, h_prev.unsqueeze(0)) # torch.unsqueeze()在指定的维度上插入一个大小为1的维度

print(rnn_output)
print(rnn_output.shape)
print(state_final)
print(state_final.shape)

# step2: 手写一个rnn_forward函数，实现RNN的计算原理
def rnn_forward(input, weight_ih, weight_hh, bias_ih, bias_hh, h_prev):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs, T, h_dim) # 初始化一个输出（状态矩阵）
    
    for t in range(T):
        x = input[:, t, :].unsqueeze(2) # 获取当前时刻的输入特征，bs*input_size*1
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs, 1, 1) # bs*h_dim*input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs, 1, 1) # bs*h_dim*h_dim
        
        w_times_x = torch.bmm(w_ih_batch, x).squeeze(-1) # bs*h_dim
        w_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1) # bs*h_dim
        h_prev = torch.tanh(w_times_x + bias_ih + w_times_h + bias_hh)

tensor([[[-0.4413,  0.3953, -0.6507],
         [-0.2710, -0.0573,  0.0396],
         [ 0.8449, -0.8337,  0.1941]],

        [[ 0.4376, -0.5249, -0.1985],
         [ 0.4136, -0.7058,  0.4908],
         [ 0.2114,  0.0180, -0.7452]]], grad_fn=<TransposeBackward1>)
torch.Size([2, 3, 3])
tensor([[[ 0.8449, -0.8337,  0.1941],
         [ 0.2114,  0.0180, -0.7452]]], grad_fn=<StackBackward0>)
torch.Size([1, 2, 3])
