# RNN 循环神经网络——字符预测
## 学习用RNN来将一个序列转换为另一个序列
![qfUyds](https://gitee.com/pxqp9W/testmarkdown/raw/master/imgs/2020/07/qfUyds.png)
![k8SRpV](https://gitee.com/pxqp9W/testmarkdown/raw/master/imgs/2020/07/k8SRpV.png)
![3bn5e2](https://gitee.com/pxqp9W/testmarkdown/raw/master/imgs/2020/07/3bn5e2.png)
- 最终可以看成是多分类问题，因此可以使用交叉熵损失

## 1 使用RNN Cell来训练

In [1]:
import torch

In [2]:
# Parameters
batch_size = 1
seq_len = 5
input_size = 4  # input_size 输入特征维数
hidden_size = 4 # hidden_size 隐层状态的维数
num_layers = 1 # num_layers RNN层的个数，在图中竖向的是层数，横向的是seq_len

In [3]:
# Dataset
idx2char = ['e', 'h', 'l', 'o'] # 字典
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0], 
                  [0, 1, 0, 0], 
                  [0, 0, 1, 0], 
                  [0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # 将x_data转为独热向量
# Reshape the inputs to （𝒔𝒆𝒒𝑳𝒆𝒏, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆）
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) 
labels = torch.LongTensor(y_data).view(-1, 1) # Reshape the labels to（𝒔𝒆𝒒𝑳𝒆𝒏, 𝟏）
print(x_one_hot)
print(inputs)
print(inputs.size())
print(labels)
print(labels.size())

[[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
tensor([[[0., 1., 0., 0.]],

        [[1., 0., 0., 0.]],

        [[0., 0., 1., 0.]],

        [[0., 0., 1., 0.]],

        [[0., 0., 0., 1.]]])
torch.Size([5, 1, 4])
tensor([[3],
        [1],
        [2],
        [3],
        [2]])
torch.Size([5, 1])


In [4]:
# Design Model
class RNN_Cell_Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(RNN_Cell_Model, self).__init__() 
        # Initial the parameters
        self.batch_size = batch_size 
        self.input_size = input_size 
        self.hidden_size = hidden_size 
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)
        
    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden) # h_t = cell(x_t , h_t-1)
        return hidden

    def init_hidden(self): # 生成默认的初始h0
        return torch.zeros(self.batch_size, self.hidden_size)

net1 = RNN_Cell_Model(input_size, hidden_size, batch_size)

In [5]:
# Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(net1.parameters(), lr=0.1)

In [6]:
# Training Cycle

In [7]:
## 使用RNN Cell训练
for epoch in range(15):
    loss = 0 
    optimizer.zero_grad() 
    hidden = net1.init_hidden() # 初始化h0
    print('Predicted string: ', end='') 
    # Shape of inputs :(𝒔𝒆𝒒𝑳𝒆𝒏, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
    # Shape of input:(𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)
    # Shape of labels :(𝒔𝒆𝒒𝑺𝒊𝒛𝒆, 𝟏)
    # Shape of label:(𝟏)
    for input, label in zip(inputs, labels):
        # Training steps
        hidden = net1(input, hidden)
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1) # hidden.max表示多分类的最大项值（即预测结果）
        print(idx2char[idx.item()], end='') 
    loss.backward() 
    optimizer.step() 
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

Predicted string: eeeee, Epoch [1/15] loss=8.7338
Predicted string: helll, Epoch [2/15] loss=6.8109
Predicted string: hllll, Epoch [3/15] loss=5.7981
Predicted string: hhlll, Epoch [4/15] loss=5.2475
Predicted string: hhlll, Epoch [5/15] loss=4.9066
Predicted string: hhlll, Epoch [6/15] loss=4.6345
Predicted string: hhlll, Epoch [7/15] loss=4.3896
Predicted string: hhlll, Epoch [8/15] loss=4.1650
Predicted string: hhlll, Epoch [9/15] loss=3.9674
Predicted string: hhlll, Epoch [10/15] loss=3.7972
Predicted string: hhlll, Epoch [11/15] loss=3.6606
Predicted string: ohlll, Epoch [12/15] loss=3.5416
Predicted string: ohlll, Epoch [13/15] loss=3.4050
Predicted string: ohlll, Epoch [14/15] loss=3.2450
Predicted string: ohlol, Epoch [15/15] loss=3.0918


## 2 使用RNN Module训练

In [8]:
# Dataset
idx2char = ['e', 'h', 'l', 'o'] # 字典
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol

one_hot_lookup = [[1, 0, 0, 0], 
                  [0, 1, 0, 0], 
                  [0, 0, 1, 0], 
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data] # 将x_data转为独热向量

# Reshape the inputs to （𝒔𝒆𝒒𝑳𝒆𝒏, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, hiddenSize)
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size) 
# Shape of labels :(𝒔𝒆𝒒𝑳𝒆𝒏 × 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝟏)
labels = torch.LongTensor(y_data)

In [9]:
class RNN_Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(RNN_Model, self).__init__() 
        self.num_layers = num_layers 
        self.batch_size = batch_size 
        self.input_size = input_size 
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=num_layers)
    def forward(self, input):
        # Shape of hidden :(𝒏𝒖𝒎𝑳𝒂𝒚𝒆𝒓𝒔, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)
        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size) 
        out, _ = self.rnn(input, hidden) 
        # Reshape out to:(𝒔𝒆𝒒𝑳𝒆𝒏 × 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆)
        return out.view(-1, self.hidden_size)
net2 = RNN_Model(input_size, hidden_size, batch_size, num_layers)

In [10]:
criterion2 = torch.nn.CrossEntropyLoss()
optimizer2 = torch.optim.Adam(net2.parameters(), lr=0.05)

In [11]:
for epoch in range(15):
    optimizer.zero_grad() 
    outputs = net2(inputs)
    loss = criterion2(outputs, labels)
    loss.backward() 
    optimizer2.step() 
    
    _, idx = outputs.max(dim=1) 
    idx = idx.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

Predicted:  ololl, Epoch [1/15] loss=1.1037
Predicted:  ololl, Epoch [2/15] loss=1.0424
Predicted:  ooool, Epoch [3/15] loss=0.9988
Predicted:  oooll, Epoch [4/15] loss=0.9634
Predicted:  oooll, Epoch [5/15] loss=0.9365
Predicted:  oholl, Epoch [6/15] loss=0.9179
Predicted:  ohlll, Epoch [7/15] loss=0.9063
Predicted:  ohlll, Epoch [8/15] loss=0.8977
Predicted:  ohlll, Epoch [9/15] loss=0.8866
Predicted:  ohlll, Epoch [10/15] loss=0.8693
Predicted:  ohlll, Epoch [11/15] loss=0.8443
Predicted:  ohlll, Epoch [12/15] loss=0.8124
Predicted:  ohlll, Epoch [13/15] loss=0.7766
Predicted:  ohlll, Epoch [14/15] loss=0.7414
Predicted:  ohlll, Epoch [15/15] loss=0.7126
