# RNN cell in Pytorch

### torch.nn.RNNCell() & torch.nn.RNN()
### torch.nn.LSTMCell() & torch.nn.LSTM()

In [83]:
import torch
from torch.autograd import Variable
from torch import nn

In [2]:
# 定义一个单步的 rnn
rnn_single = nn.RNNCell(input_size=100, hidden_size=200)

In [3]:
# 访问其中的参数
rnn_single.weight_hh

Parameter containing:
tensor([[ 0.0083, -0.0017, -0.0677,  ..., -0.0704, -0.0362,  0.0629],
        [ 0.0239, -0.0640, -0.0293,  ..., -0.0418,  0.0042, -0.0208],
        [ 0.0417,  0.0106, -0.0342,  ...,  0.0124, -0.0031, -0.0418],
        ...,
        [ 0.0638, -0.0256, -0.0199,  ..., -0.0173, -0.0097,  0.0149],
        [ 0.0480, -0.0320, -0.0168,  ..., -0.0290, -0.0562,  0.0531],
        [-0.0359,  0.0138, -0.0278,  ...,  0.0099,  0.0323, -0.0372]],
       requires_grad=True)

In [8]:
# 访问其中的参数
rnn_single.weight_hh.size()

torch.Size([200, 200])

# Start (RNNCell)

<div align="center">
  <img src="css/rnn.jpeg" width="70%"/>
</div>


In [35]:
# 定义一个单步的 rnn
rnn_single = nn.RNNCell(input_size=100, hidden_size=200)

# 构造一个序列，长为 6，batch 是 5， 特征是 100
x = Variable(torch.randn(6, 5, 100)) # 这是 rnn 的输入格式

# 定义初始的记忆状态
h_t = Variable(torch.zeros(5, 200))


# 传入 rnn
out = []
for i in range(6): # 通过循环 6 次作用在整个序列上
    h_t = rnn_single(x[i], h_t)
    out.append(h_t)
    
    
print("x input = ", x.size() )
print("init state = ", h_t.size() )
print(" ")
print("final state output = ", h_t.size())

print("get {} output in out".format(len(out)))

x input =  torch.Size([6, 5, 100])
init state =  torch.Size([5, 200])
 
final state output =  torch.Size([5, 200])
get 6 output in out


# Start (RNN)

- 一般情况下我们都是用 nn.RNN() 而不是 nn.RNNCell()，
- 因为 nn.RNN() 能够避免我们手动写循环，非常方便，
- 同时如果不特别说明，我们也会选择使用默认的全 0 初始化隐藏状态

In [84]:
rnn_seq = nn.RNN(100, 200)

In [85]:
# 访问其中的参数
rnn_seq.weight_hh_l0

Parameter containing:
tensor([[ 0.0213,  0.0327, -0.0550,  ...,  0.0291, -0.0205, -0.0494],
        [-0.0030, -0.0407,  0.0207,  ..., -0.0018,  0.0182,  0.0278],
        [ 0.0237,  0.0191,  0.0400,  ..., -0.0562, -0.0289, -0.0374],
        ...,
        [-0.0459,  0.0371, -0.0044,  ...,  0.0271, -0.0448,  0.0354],
        [ 0.0267, -0.0429,  0.0241,  ..., -0.0197, -0.0009,  0.0148],
        [-0.0290,  0.0197,  0.0474,  ...,  0.0221,  0.0477,  0.0478]],
       requires_grad=True)

In [86]:
rnn_seq.weight_hh_l0.shape

torch.Size([200, 200])

In [87]:
rnn_seq = nn.RNN(100, 200)

# 使用默认的全 0 隐藏状态
out, h_t = rnn_seq(x) 

# 这里的 h_t 是网络最后的隐藏状态
# 网络也输出了 6 个 output， out = (seq, batch, feature)
print("final state output = ", h_t.size())

print(" ")
print("get {} output in out".format(len(out)))
print("out = {}".format(out.shape))

final state output =  torch.Size([1, 5, 200])
 
get 6 output in out
out = torch.Size([6, 5, 200])


In [88]:
rnn_seq = nn.RNN(100, 200)

# 自己定义初始的隐藏状态
# 这里的隐藏状态的大小有三个维度，分别是 (num_layers * num_direction, batch, hidden_size)
h_0 = Variable(torch.randn(1, 5, 200))

out, h_t = rnn_seq(x, h_0)


print("final state output = ", h_t.size())
print(" ")
print("get {} output in out".format(len(out)))
print("out = {}".format(out.shape))

final state output =  torch.Size([1, 5, 200])
 
get 6 output in out
out = torch.Size([6, 5, 200])


In [91]:
s, b, h = out.shape
print(out.shape)
print(s,b,h)

torch.Size([6, 5, 200])
6 5 200


# LSTM

<div align="center">
  <img src="css/lstm.jpeg" width="70%"/>
</div>

- 注意这里 LSTM 输出的隐藏状态有两个，h 和 c，就是上图中的每个 cell 之间的两个箭头，这两个隐藏状态的大小都是相同的
- (num_layers * direction, batch, feature)

In [75]:
lstm_seq = nn.LSTM(50, 100, num_layers=2) # 输入维度 100，输出 200，两层

In [76]:
lstm_seq.weight_hh_l0 # 第一层的 h_t 权重

Parameter containing:
tensor([[-0.0676,  0.0521, -0.0513,  ..., -0.0539, -0.0024,  0.0850],
        [ 0.0501,  0.0255, -0.0657,  ...,  0.0357,  0.0838, -0.0272],
        [-0.0853,  0.0025,  0.0305,  ...,  0.0430, -0.0423, -0.0073],
        ...,
        [ 0.0264, -0.0014,  0.0088,  ..., -0.0227,  0.0053,  0.0405],
        [-0.0515,  0.0842, -0.0931,  ...,  0.0984, -0.0607,  0.0590],
        [-0.0228, -0.0989, -0.0457,  ...,  0.0791, -0.0570, -0.0397]],
       requires_grad=True)

In [77]:
lstm_seq.weight_hh_l0.shape

torch.Size([400, 100])

In [58]:
lstm_seq = nn.LSTM(50, 100, num_layers=2) # 输入维度 100，输出 200，两层

lstm_input = Variable(torch.randn(10, 3, 50)) # 序列 10，batch 是 3，输入维度 50

out, (h, c) = lstm_seq(lstm_input) # 使用默认的全 0 隐藏状态, 注意这里 LSTM 输出的隐藏状态有两个，h 和 c

In [59]:
h.shape # 两层，Batch 是 3，特征是 100

torch.Size([2, 3, 100])

In [60]:
c.shape

torch.Size([2, 3, 100])

In [61]:
out.shape

torch.Size([10, 3, 100])

In [62]:
# 不使用默认的隐藏状态

lstm_seq = nn.LSTM(50, 100, num_layers=2) # 输入维度 100，输出 200，两层

lstm_input = Variable(torch.randn(10, 3, 50)) # 序列 10，batch 是 3，输入维度 50
h_init = Variable(torch.randn(2, 3, 100))
c_init = Variable(torch.randn(2, 3, 100))

out, (h, c) = lstm_seq(lstm_input, (h_init, c_init)) 

In [63]:
h.shape 

torch.Size([2, 3, 100])

In [64]:
c.shape

torch.Size([2, 3, 100])

In [65]:
out.shape

torch.Size([10, 3, 100])

# GRU

<div align="center">
  <img src="css/gru.jpeg" width="70%"/>
</div>


In [78]:
gru_seq = nn.GRU(10, 20)

gru_input = Variable(torch.randn(3, 32, 10))

out, h = gru_seq(gru_input)

In [79]:
gru_seq.weight_hh_l0

Parameter containing:
tensor([[-0.0282, -0.1897, -0.1052,  ..., -0.1062,  0.1588, -0.0910],
        [-0.0097,  0.1193,  0.0938,  ...,  0.1145, -0.1565,  0.1870],
        [-0.1643,  0.0829,  0.1664,  ...,  0.0507, -0.1210, -0.1360],
        ...,
        [-0.0405,  0.1974, -0.0814,  ..., -0.1745,  0.1608,  0.1269],
        [ 0.2109, -0.0397, -0.0079,  ...,  0.0957, -0.1421,  0.2173],
        [-0.0900, -0.1897, -0.1490,  ..., -0.1499, -0.1126, -0.1607]],
       requires_grad=True)

In [80]:
gru_seq.weight_hh_l0.shape

torch.Size([60, 20])

In [81]:
h.shape

torch.Size([1, 32, 20])

In [82]:
out.shape

torch.Size([3, 32, 20])