In [1]:
import torch
import torch.nn as nn

In [6]:
bs, T=2,3 #批大小，输入序列长度
input_size,hidden_size = 2,3 #输入特征大小，隐藏层特征大小
input = torch.randn(bs,T,input_size) #随机初始化一个输入特征序列
h_prev = torch.zeros(bs,hidden_size) #初始隐含状态

In [17]:
#step1 调用RNN 
rnn = nn.RNN(input_size,hidden_size,batch_first = True)
rnn_output ,state_final = rnn(input,h_prev.unsqueeze(0))
print("Pytorch output")
print(rnn_output)
print(state_final)

Pytorch output
tensor([[[-0.6366,  0.6040, -0.2286],
         [-0.3126,  0.8897,  0.0635],
         [-0.2692,  0.8053,  0.4785]],

        [[-0.6001,  0.6333, -0.1659],
         [-0.2251,  0.8493,  0.2865],
         [-0.3143,  0.7564,  0.5445]]], grad_fn=<TransposeBackward1>)
tensor([[[-0.2692,  0.8053,  0.4785],
         [-0.3143,  0.7564,  0.5445]]], grad_fn=<StackBackward0>)


In [18]:
#step2 手写rnn_forward函数
def rnn_foward(input ,weight_ih,weight_hh, bias_ih,bias_hh,h_prev ):
    bs, T, input_size = input.shape
    h_dim = weight_ih.shape[0]
    h_out = torch.zeros(bs,T,h_dim) #初始化一个输出矩阵

    for t in range(T):
        x = input[:,t,:].unsqueeze(2) #获取当前时刻输入,bs * input_size
        w_ih_batch = weight_ih.unsqueeze(0).tile(bs,1,1) #bs*h_dim*input_size
        w_hh_batch = weight_hh.unsqueeze(0).tile(bs,1,1) #bs*h_dim*h_dim

        w_times_x = torch.bmm(w_ih_batch,x).squeeze(-1) #bs*h_dim
        w_times_h = torch.bmm(w_hh_batch,h_prev.unsqueeze(2)).squeeze(-1) #bs*h_dim
        h_prev = torch.tanh(w_times_x+bias_ih+bias_hh)
        h_out[:,t,:] = h_prev
    return h_out,h_prev.unsqueeze(0)
    
    
# #验证正确性
# for k,v in rnn.named_parameters():
#     print(k,v)
custom_rnn_output,custom_state_final = rnn_foward(input ,rnn.weight_ih_l0,rnn.weight_hh_l0, rnn.bias_ih_l0,rnn.bias_hh_l0,h_prev)
print("rnn_forward output")
print(custom_rnn_output)
print(custom_state_final)

rnn_forward output
tensor([[[-0.6366,  0.6040, -0.2286],
         [-0.6375,  0.5691, -0.2138],
         [-0.5259,  0.4500,  0.0682]],

        [[-0.6001,  0.6333, -0.1659],
         [-0.5631,  0.4710, -0.0129],
         [-0.4894,  0.4850,  0.1203]]], grad_fn=<CopySlices>)
tensor([[[-0.5259,  0.4500,  0.0682],
         [-0.4894,  0.4850,  0.1203]]], grad_fn=<UnsqueezeBackward0>)


In [29]:
#实现LSTM网络
bs, T, i_size, h_size = 2,3,4,5
input = torch.randn(bs,T,i_size) #输入序列
c_0 = torch.randn(bs,h_size) #初始值，不需要训练
h_0 = torch.randn(bs,h_size)

lstm_layer = nn.LSTM(i_size,h_size,batch_first = True)
output, (h_final,c_final) = lstm_layer (input , (h_0.unsqueeze(0), c_0.unsqueeze(0)))
# print(output)
# for k,v in lstm_layer.named_parameters():
#     print(k,v.shape)
#自己写LSTM模型
print(output)
def lstm_forward(input,initial_state,w_ih,w_hh,b_ih,b_hh):
    h0,c0 = initial_state
    bs,T,i_size = input.shape
    h_size = w_ih.shape[0]//4
    prev_h = h0
    prev_c = c0
    batch_w_ih = w_ih.unsqueeze(0).tile(bs,1,1) #bs,4*h_size,i_size
    batch_w_hh = w_hh.unsqueeze(0).tile(bs,1,1) #bs,4*h_size,h_size
    output_size = h_size
    output = torch.zeros(bs,T,output_size) #输出序列

    for t in range(T):
        x = input[:,t,:] #当前时刻的输入向量 [bs,i_size]
        w_times_x = torch.bmm(batch_w_ih,x.unsqueeze(-1)) #bs,4*h_size,1
        w_times_x = w_times_x.squeeze(-1) #[bs,4*h_size]

        w_times_h_prev = torch.bmm(batch_w_hh,prev_h.unsqueeze(-1)) #bs,4*h_size,1
        w_times_h_prev = w_times_h_prev.squeeze(-1) #[bs,4*h_size]
        #分别计算输入门 i ，遗忘门 f，cell门 g，输出门 o
        i_t = torch.sigmoid(w_times_x[: ,:h_size]+w_times_h_prev[:,:h_size] +b_ih[:h_size]+b_hh[:h_size])
        f_t = torch.sigmoid(w_times_x[: ,h_size:2*h_size]+w_times_h_prev[:,h_size:2*h_size] +b_ih[h_size:2*h_size]+b_hh[h_size:2*h_size])
        g_t = torch.tanh(w_times_x[: ,2*h_size:3*h_size]+w_times_h_prev[:,2*h_size:3*h_size] +b_ih[2*h_size:3*h_size]+b_hh[2*h_size:3*h_size])
        o_t = torch.sigmoid(w_times_x[: ,3*h_size:4*h_size]+w_times_h_prev[:,3*h_size:4*h_size] +b_ih[3*h_size:4*h_size]+b_hh[3*h_size:4*h_size])
        prev_c = f_t*prev_c + i_t*g_t
        prev_h = o_t * torch.tanh(prev_c)

        output[:,t,:] = prev_h
    return output,(prev_h,prev_c)

output_custom, (h_final_custom,c_final_custom) =lstm_forward(input,(h_0,c_0),lstm_layer.weight_ih_l0,lstm_layer.weight_hh_l0,lstm_layer.bias_ih_l0,lstm_layer.bias_hh_l0)
print(output_custom)


tensor([[[ 0.1692, -0.0495,  0.0935, -0.1825,  0.2068],
         [ 0.1206, -0.0570,  0.2523, -0.1263,  0.1092],
         [ 0.1638,  0.0158,  0.1109, -0.1812,  0.0871]],

        [[ 0.6488,  0.0352,  0.1579,  0.0091,  0.2979],
         [ 0.3841,  0.0407,  0.1459, -0.0854,  0.3260],
         [ 0.2932,  0.0475,  0.1271, -0.1998,  0.1944]]],
       grad_fn=<TransposeBackward0>)
tensor([[[ 0.1692, -0.0495,  0.0935, -0.1825,  0.2068],
         [ 0.1206, -0.0570,  0.2523, -0.1263,  0.1092],
         [ 0.1638,  0.0158,  0.1109, -0.1812,  0.0871]],

        [[ 0.6488,  0.0352,  0.1579,  0.0091,  0.2979],
         [ 0.3841,  0.0407,  0.1459, -0.0854,  0.3260],
         [ 0.2932,  0.0475,  0.1271, -0.1998,  0.1944]]], grad_fn=<CopySlices>)
