# 1 一些琐碎代码

## 1.1 RNNCell

In [6]:
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

# Construction of RNNCell
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
# Wrapping the sequence into:(seqLen,batchSize,InputSize)
dataset = torch.randn(seq_len, batch_size, input_size)  # (3,1,4)
# Initializing the hidden to zero
hidden = torch.zeros(batch_size, hidden_size)  # (1,2)

for idx, input in enumerate(dataset):
    print('=' * 20, idx, '=' * 20)  #分割线，20个=号
    print('Input size:', input.shape)  # (batch_size, input_size)
    # 按序列依次输入到cell中，seq_len=3，故循环3次
    hidden = cell(input, hidden)  # 返回的hidden是下一次的输入之一，循环使用同一个cell

    print('output size:', hidden.shape)  # (batch_size, hidden_size)
    print(hidden)

Input size: torch.Size([1, 4])
output size: torch.Size([1, 2])
tensor([[-0.4140,  0.1517]], grad_fn=<TanhBackward0>)
Input size: torch.Size([1, 4])
output size: torch.Size([1, 2])
tensor([[-0.4725, -0.7875]], grad_fn=<TanhBackward0>)
Input size: torch.Size([1, 4])
output size: torch.Size([1, 2])
tensor([[-0.8257, -0.2262]], grad_fn=<TanhBackward0>)


## 1.2 RNN

In [9]:
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1  # RNN层数

# Construction of RNN
rnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
# Wrapping the sequence into:(seqLen,batchSize,InputSize)
inputs = torch.randn(seq_len, batch_size, input_size)  # (3,1,4)
# Initializing the hidden to zero
hidden = torch.zeros(num_layers, batch_size, hidden_size)  # (1,1,2)

output, hidden = rnn(inputs, hidden)  # RNN内部包含了循环，故这里只需把整个序列输入即可

print('Output size:', output.shape)  # (seq_len, batch_size, hidden_size)
print('Output:', output)
print('Hidden size:', hidden.shape)  # (num_layers, batch_size, hidden_size)
print('Hidden:', hidden)

Output size: torch.Size([3, 1, 2])
Output: tensor([[[-0.9880, -0.8818]],

        [[ 0.6066,  0.9090]],

        [[-0.3108,  0.7957]]], grad_fn=<StackBackward0>)
Hidden size: torch.Size([1, 1, 2])
Hidden: tensor([[[-0.3108,  0.7957]]], grad_fn=<StackBackward0>)


### 1.3 RNN参数：batch_first

In [10]:
import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1  # RNN层数

# Construction of RNN, batch_first=True
rnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
# 仅这里做了更改 Wrapping the sequence into:(batchSize,seqLen,InputSize)
inputs = torch.randn(batch_size, seq_len, input_size)  # (1,3,4)
# Initializing the hidden to zero
hidden = torch.zeros(num_layers, batch_size, hidden_size)  # (1,1,2)

output, hidden = rnn(inputs, hidden)  # RNN内部包含了循环，故这里只需把整个序列输入即可

print('Output size:', output.shape)  # 仅输出维度发生变化(batch_size, seq_len, hidden_size)
print('Output:', output)
print('Hidden size:', hidden.shape)  # (num_layers, batch_size, hidden_size)
print('Hidden:', hidden)

Output size: torch.Size([1, 3, 2])
Output: tensor([[[ 0.6276, -0.1454],
         [ 0.0294,  0.3148],
         [-0.3239,  0.4692]]], grad_fn=<TransposeBackward1>)
Hidden size: torch.Size([1, 1, 2])
Hidden: tensor([[[-0.3239,  0.4692]]], grad_fn=<StackBackward0>)


# 2 例子：序列变换把 "hello" --> "ohlol"

## 2.1 使用RNNCell

In [20]:
import torch

# 1、确定参数
input_size = 4
hidden_size = 4
batch_size = 1

# 2、准备数据
index2char = ['e', 'h', 'l', 'o']  #字典
x_data = [1, 0, 2, 2, 3]  #用字典中的索引（数字）表示来表示hello
y_data = [3, 1, 2, 3, 2]  #标签：ohlol

one_hot_lookup = [[1, 0, 0, 0],  # 用来将x_data转换为one-hot向量的参照表
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]  #将x_data转换为one-hot向量
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)  #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
labels = torch.LongTensor(y_data).view(-1, 1)  # (𝒔𝒆𝒒𝑳𝒆𝒏,𝟏).计算交叉熵损失时标签不需要我们进行one-hot编码，其内部会自动进行处理


# 3、构建模型
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden

    def init_hidden(self):  #初始化隐藏层，需要batch_size
        return torch.zeros(self.batch_size, self.hidden_size)


net = Model(input_size, hidden_size, batch_size)

# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)  # Adam优化器

# 5、训练
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()  #梯度清零
    hidden = net.init_hidden()  # 初始化隐藏层
    print('Predicted string:', end='')
    for input, label in zip(inputs, labels):  #每次输入一个字符，即按序列次序进行循环
        hidden = net(input, hidden)
        loss += criterion(hidden, label)  # 计算损失，不用item()，因为后面还要反向传播
        _, idx = hidden.max(dim=1)  # 选取最大值的索引
        print(index2char[idx.item()], end='')  # 打印预测的字符
    loss.backward()  # 反向传播
    optimizer.step()  # 更新参数
    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

Predicted string:hehee, Epoch [1/15] loss: 8.2711
Predicted string:olhll, Epoch [2/15] loss: 6.2931
Predicted string:ollll, Epoch [3/15] loss: 5.3395
Predicted string:ollll, Epoch [4/15] loss: 4.7223
Predicted string:ohlll, Epoch [5/15] loss: 4.2614
Predicted string:ohlll, Epoch [6/15] loss: 3.9137
Predicted string:ohlol, Epoch [7/15] loss: 3.6579
Predicted string:ohlol, Epoch [8/15] loss: 3.4601
Predicted string:ohlol, Epoch [9/15] loss: 3.2896
Predicted string:ohlol, Epoch [10/15] loss: 3.1306
Predicted string:ohlol, Epoch [11/15] loss: 2.9806
Predicted string:ohlol, Epoch [12/15] loss: 2.8476
Predicted string:ohlol, Epoch [13/15] loss: 2.7450
Predicted string:ohlol, Epoch [14/15] loss: 2.6792
Predicted string:ohlol, Epoch [15/15] loss: 2.6347


## 2.2 使用RNN

In [29]:
import torch

# 1、确定参数
seq_len = 5
input_size = 4
hidden_size = 4
batch_size = 1

# 2、准备数据
index2char = ['e', 'h', 'l', 'o']  #字典
x_data = [1, 0, 2, 2, 3]  #用字典中的索引（数字）表示来表示hello
y_data = [3, 1, 2, 3, 2]  #标签：ohlol

one_hot_lookup = [[1, 0, 0, 0],  # 用来将x_data转换为one-hot向量的参照表
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]  #将x_data转换为one-hot向量
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size,
                                      input_size)  #(𝒔𝒆𝒒𝑳𝒆𝒏,𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
labels = torch.LongTensor(y_data)


# 3、构建模型
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=num_layers)

    def forward(self, input):
        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        out, _ = self.rnn(input, hidden)  # out: tensor of shape (seq_len, batch, hidden_size)
        return out.view(-1, self.hidden_size)  # 将输出的三维张量转换为二维张量,(𝒔𝒆𝒒𝑳𝒆𝒏×𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)

    def init_hidden(self):  #初始化隐藏层，需要batch_size
        return torch.zeros(self.batch_size, self.hidden_size)


net = Model(input_size, hidden_size, batch_size, num_layers)

# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)  # Adam优化器

# 5、训练
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

Predicted string:  hhhhh, Epoch [1/15] loss: 1.4325
Predicted string:  hhhhh, Epoch [2/15] loss: 1.2532
Predicted string:  ohhoh, Epoch [3/15] loss: 1.1057
Predicted string:  ohlol, Epoch [4/15] loss: 0.9970
Predicted string:  ohlol, Epoch [5/15] loss: 0.9208
Predicted string:  oolol, Epoch [6/15] loss: 0.8669
Predicted string:  oolol, Epoch [7/15] loss: 0.8250
Predicted string:  oolol, Epoch [8/15] loss: 0.7863
Predicted string:  oolol, Epoch [9/15] loss: 0.7453
Predicted string:  oolol, Epoch [10/15] loss: 0.7024
Predicted string:  oolol, Epoch [11/15] loss: 0.6625
Predicted string:  oolol, Epoch [12/15] loss: 0.6291
Predicted string:  ohlol, Epoch [13/15] loss: 0.6026
Predicted string:  ohlol, Epoch [14/15] loss: 0.5812
Predicted string:  ohlol, Epoch [15/15] loss: 0.5630


## 2.3 使用embedding and linear layer

In [31]:
import torch

# 1、确定参数
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5

# 2、准备数据
index2char = ['e', 'h', 'l', 'o']  #字典
x_data = [[1, 0, 2, 2, 3]]  # (batch_size, seq_len) 用字典中的索引（数字）表示来表示hello
y_data = [3, 1, 2, 3, 2]  #  (batch_size * seq_len) 标签：ohlol

inputs = torch.LongTensor(x_data)  # (batch_size, seq_len)
labels = torch.LongTensor(y_data)  # (batch_size * seq_len)


# 3、构建模型
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.emb = torch.nn.Embedding(num_class, embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
                                batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)  # (num_layers, batch_size, hidden_size)
        x = self.emb(x)  # 返回(batch_size, seq_len, embedding_size)
        x, _ = self.rnn(x, hidden)  # 返回(batch_size, seq_len, hidden_size)
        x = self.fc(x)  # 返回(batch_size, seq_len, num_class)
        return x.view(-1, num_class)  # (batch_size * seq_len, num_class)


net = Model()

# 4、损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)  # Adam优化器

# 5、训练
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted string: ', ''.join([index2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss: %.4f' % (epoch + 1, loss.item()))

Predicted string:  eeeee, Epoch [1/15] loss: 1.5407
Predicted string:  oolol, Epoch [2/15] loss: 1.1158
Predicted string:  oolol, Epoch [3/15] loss: 0.9047
Predicted string:  ohlol, Epoch [4/15] loss: 0.7391
Predicted string:  lhlol, Epoch [5/15] loss: 0.6006
Predicted string:  ohlol, Epoch [6/15] loss: 0.4833
Predicted string:  ohlol, Epoch [7/15] loss: 0.3581
Predicted string:  ohlol, Epoch [8/15] loss: 0.2540
Predicted string:  ohlol, Epoch [9/15] loss: 0.1921
Predicted string:  ohlol, Epoch [10/15] loss: 0.1351
Predicted string:  ohlol, Epoch [11/15] loss: 0.0972
Predicted string:  ohlol, Epoch [12/15] loss: 0.0752
Predicted string:  ohlol, Epoch [13/15] loss: 0.0594
Predicted string:  ohlol, Epoch [14/15] loss: 0.0465
Predicted string:  ohlol, Epoch [15/15] loss: 0.0363
