In [1]:
import torch
import torch.nn as nn

LSTM的使用示例 (GRU的参数同GRU)

In [2]:
batch_size = 64     # 句子的数量
seq_len = 20        # 句子的长度
vocab_size = 100    # 词典的数量
embedding_dim = 30  # 用长度为30的向量来表示一个词语
hidden_size = 18    # 隐层中LSTM的个数
num_layers = 2      # 多少个隐藏层

构造一个batch的数据

In [3]:
input = torch.randint(low=0, high=100,size=[batch_size, seq_len])

In [4]:
input.shape

torch.Size([64, 20])

数据经过embedding处理

In [5]:
embedding = nn.Embedding(vocab_size, embedding_dim)

In [6]:
input_embedded = embedding(input)

In [7]:
print(embedding)
print(input.shape)
print(input_embedded.shape)

Embedding(100, 30)
torch.Size([64, 20])
torch.Size([64, 20, 30])


把embedding之后的数据传给LSTM

In [8]:
lstm = nn.LSTM(input_size=embedding_dim,
               hidden_size=hidden_size,
               num_layers=num_layers,
               batch_first=True)

In [9]:
output, (h_n, c_n) = lstm(input_embedded)

if Bidrectional = False -> 1, if Bidirectional = False -> 2

In [10]:
print("LSTM layer spec: ", lstm)
print("shape for input_embedded: ", input_embedded.shape)
print("shape for output: ", output.shape) # [62, 20, 18 * bidirectional = False], 而20表示了time step
print("shape for h_n: ", h_n.shape)       # [num_layers * bidirectional = False, 64, 18]
print("shape for c_n: ", c_n.shape)       # [num_layers * bidirectional = False, 64, 18]

LSTM layer spec:  LSTM(30, 18, num_layers=2, batch_first=True)
shape for input_embedded:  torch.Size([64, 20, 30])
shape for output:  torch.Size([64, 20, 18])
shape for h_n:  torch.Size([2, 64, 18])
shape for c_n:  torch.Size([2, 64, 18])


获取最后一个时间步(time step)上的输出

In [21]:
last_output = output[:, -1, :]
last_output

tensor([[ 0.1624, -0.1872,  0.0287,  ..., -0.0135,  0.0211,  0.0261],
        [ 0.1659, -0.1820, -0.0288,  ..., -0.0564,  0.0506,  0.0176],
        [ 0.2006, -0.1585,  0.0334,  ...,  0.0230,  0.0515, -0.0073],
        ...,
        [ 0.1631, -0.2159,  0.0200,  ..., -0.0376,  0.0608,  0.0452],
        [ 0.2187, -0.1842,  0.0560,  ...,  0.0190,  0.0658,  0.0201],
        [ 0.1926, -0.1968,  0.0103,  ..., -0.0367,  0.0525, -0.0333]],
       grad_fn=<SliceBackward0>)

获取最后一次的hidden_state

In [22]:
last_hidden_state = h_n[-1, :, :]
#1  #第一层的正向
#-1 #第一层的反向
#1  #第二层的正向
#-1 #第二层的反向
last_hidden_state

tensor([[ 0.1624, -0.1872,  0.0287,  ..., -0.0135,  0.0211,  0.0261],
        [ 0.1659, -0.1820, -0.0288,  ..., -0.0564,  0.0506,  0.0176],
        [ 0.2006, -0.1585,  0.0334,  ...,  0.0230,  0.0515, -0.0073],
        ...,
        [ 0.1631, -0.2159,  0.0200,  ..., -0.0376,  0.0608,  0.0452],
        [ 0.2187, -0.1842,  0.0560,  ...,  0.0190,  0.0658,  0.0201],
        [ 0.1926, -0.1968,  0.0103,  ..., -0.0367,  0.0525, -0.0333]],
       grad_fn=<SliceBackward0>)

In [23]:
# output, 把每个时间步上的结果再seq_len这一维度进行了拼接
# h_n, 把不同层的隐藏状态在第0个维度上进行了拼接

In [24]:
print(last_hidden_state == last_output)

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])


In [12]:
x = torch.tensor([[0, 1, 2], [3, 4, 5]])
x

tensor([[0, 1, 2],
        [3, 4, 5]])

In [13]:
y = torch.tensor([[6, 7, 8], [9, 10, 11]])
y

tensor([[ 6,  7,  8],
        [ 9, 10, 11]])

In [16]:
torch.concat([x, y], dim=-1)

tensor([[ 0,  1,  2,  6,  7,  8],
        [ 3,  4,  5,  9, 10, 11]])