In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import datasets

# 循环神经网络(基础)

## 使用场景

DNN 的参数量很容易过多, 训练费时耗能大. 

RNN 能够处理序列数据, 并且可以并行处理序列中的每个元素. 主要用于有顺序序关系的输入. 

## What is RNN cell?

- RNN cell 本质是一个线性层. 
- RNN cell 输入一个向量, 输出一个变换维度的向量. 

## RNN cell Structure

$$
\begin{align*}
h_{t-1} \in \mathbb R^{hidden\_size} \rightarrow W_{hh} h_{t-1} + b{hh} \rightarrow & + \rightarrow tanh \rightarrow h_t \in \mathbb R^{hidden\_size} \\
& \uparrow \\
W_{ih} & + b_{ih} \\
& \uparrow \\
x_t \in & \mathbb R^{input\_size}
\end{align*}
$$

## RNN cell in Pytorch

```python
cell = torch.nn.RNNCell(input_size=input_dim, hidden_size=hidden_dim)
hidden = cell(input, hidden)
```

Input Parameters:
- input (Tensor): A mini-batch of input sequences. shape(batch, input_size).
- hidden (Tensor): A mini-batch of initial hidden states. shape(batch, hidden_size).

Return:
- hidden (Tensor): A mini-batch of hidden states. shape(batch, hidden_size).

## How to use RNN cell?

In [3]:
batch_size = 1
seq_len = 3
input_dim = 4
hidden_dim = 2

cell = torch.nn.RNNCell(input_size=input_dim, hidden_size=hidden_dim)

dataset = torch.randn(seq_len, batch_size, input_dim)
hidden = torch.zeros(batch_size, hidden_dim)

for idx, input in enumerate(dataset):
    print(f"{'='*20} {idx+1} {'='*20}")
    print("Input size:", input.shape)

    hidden = cell(input, hidden)

    print("Hidden size:", hidden.shape)
    print(hidden)

Input size: torch.Size([1, 4])
Hidden size: torch.Size([1, 2])
tensor([[ 0.8535, -0.2568]], grad_fn=<TanhBackward0>)
Input size: torch.Size([1, 4])
Hidden size: torch.Size([1, 2])
tensor([[ 0.7672, -0.9486]], grad_fn=<TanhBackward0>)
Input size: torch.Size([1, 4])
Hidden size: torch.Size([1, 2])
tensor([[0.9501, 0.0591]], grad_fn=<TanhBackward0>)


## How to use RNN in PyTorch?

```python
cell = torch.nn.RNN(input_size, hidden_size, num_layers, ...)
output, hidden = cell(input, hidden)
```



In [4]:
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)

output, hidden = cell(inputs, hidden)

print("Output size:", output.shape)
print(output)
print("Hidden size:", hidden.shape)
print(hidden)

Output size: torch.Size([3, 1, 2])
tensor([[[0.1076, 0.8134]],

        [[0.0182, 0.8129]],

        [[0.4016, 0.5810]]], grad_fn=<StackBackward0>)
Hidden size: torch.Size([1, 1, 2])
tensor([[[0.4016, 0.5810]]], grad_fn=<StackBackward0>)


## Example-1: Transform `hello` to `ohlol` using a RNN

In [10]:
# Step.1 Prepare Data
idx2char = list('ehlo')
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

batch_size = 1
input_size = 4
hidden_size = 4

one_hot_lookup = [[1 if i == j else 0 for i in range(4)] for j in range(4)]
x_one_hot = [one_hot_lookup[i] for i in x_data]

inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)

# Step.2 Design Model
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size) -> None:
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.rnncell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
    
    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden
    
    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)

net = Model(input_size=input_size, hidden_size=hidden_size, batch_size=batch_size)

# Step.3 Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

# Step.4 Train the model
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()
    print("Predicted String: ", end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1)
        print(idx2char[idx.item()], end='')
    loss.backward()
    optimizer.step()
    print(f'\nEpoch: {epoch+1}, Loss: {loss.item()}')

Predicted String: hohhh
Epoch: 1, Loss: 7.254288673400879
Predicted String: ooohh
Epoch: 2, Loss: 5.930866241455078
Predicted String: oolol
Epoch: 3, Loss: 5.167537689208984
Predicted String: oolll
Epoch: 4, Loss: 4.718964576721191
Predicted String: oolll
Epoch: 5, Loss: 4.37188720703125
Predicted String: oolll
Epoch: 6, Loss: 4.059685707092285
Predicted String: ohlll
Epoch: 7, Loss: 3.7832038402557373
Predicted String: ohlll
Epoch: 8, Loss: 3.5647475719451904
Predicted String: ohlll
Epoch: 9, Loss: 3.3996331691741943
Predicted String: ohlll
Epoch: 10, Loss: 3.2502315044403076
Predicted String: ohlll
Epoch: 11, Loss: 3.0879557132720947
Predicted String: ohlol
Epoch: 12, Loss: 2.9208261966705322
Predicted String: ohlol
Epoch: 13, Loss: 2.7625157833099365
Predicted String: ohlol
Epoch: 14, Loss: 2.6187357902526855
Predicted String: ohlol
Epoch: 15, Loss: 2.496582508087158


## Example-2: Using RNN Module

In [12]:
# Step.1 Prepare Data
idx2char = list('ehlo')
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

batch_size = 1
input_size = 4
hidden_size = 4
num_layers = 1
seq_len = 5

one_hot_lookup = [[1 if i == j else 0 for i in range(4)] for j in range(4)]
x_one_hot = [one_hot_lookup[i] for i in x_data]

inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
labels = torch.LongTensor(y_data)

# Step.2 Design Model
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1) -> None:
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers

        self.rnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
    
    def forward(self, input):
        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        out, _ = self.rnn(input, hidden)
        return out.view(-1, self.hidden_size)

net = Model(input_size=input_size, hidden_size=hidden_size, batch_size=batch_size, num_layers=num_layers)

# Step.3 Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

# Step.4 Train the model
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print(f'Predicted: {"".join([idx2char[i] for i in idx])}')
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Predicted: eeeee
Epoch: 1, Loss: 1.6607351303100586
Predicted: eheee
Epoch: 2, Loss: 1.51809823513031
Predicted: ehlel
Epoch: 3, Loss: 1.375065565109253
Predicted: ehlll
Epoch: 4, Loss: 1.2383852005004883
Predicted: ehlll
Epoch: 5, Loss: 1.1171627044677734
Predicted: ohlll
Epoch: 6, Loss: 1.0149692296981812
Predicted: ohlll
Epoch: 7, Loss: 0.9291753768920898
Predicted: ohlll
Epoch: 8, Loss: 0.8549618721008301
Predicted: ohlll
Epoch: 9, Loss: 0.789040744304657
Predicted: ohlll
Epoch: 10, Loss: 0.7315219044685364
Predicted: ohlll
Epoch: 11, Loss: 0.6842751502990723
Predicted: ohlol
Epoch: 12, Loss: 0.6474617719650269
Predicted: ohlol
Epoch: 13, Loss: 0.6184414625167847
Predicted: ohlol
Epoch: 14, Loss: 0.5939062833786011
Predicted: ohlol
Epoch: 15, Loss: 0.5719873905181885


## Example-3: Using Embedding and Linear Layer

In [14]:
# Step.1 Prepare Data
idx2char = list('ehlo')
x_data = [[1, 0, 2, 2, 3]]
y_data = [3, 1, 2, 3, 2]

num_class = 4
batch_size = 1
input_size = 4
hidden_size = 8
embadding_size = 10
num_layers = 2
seq_len = 5

inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

# Step.2 Design Model
class Model(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.emb = torch.nn.Embedding(input_size, embadding_size)

        self.rnn = torch.nn.RNN(input_size=embadding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, num_class)
    
    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emb(x)
        x, _ = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)

net = Model()

# Step.3 Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

# Step.4 Train the model
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print(f'Predicted: {"".join([idx2char[i] for i in idx])}')
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Predicted: hhhhh
Epoch: 1, Loss: 1.4088255167007446
Predicted: hhhhl
Epoch: 2, Loss: 1.1075539588928223
Predicted: oolol
Epoch: 3, Loss: 0.8866984248161316
Predicted: oolol
Epoch: 4, Loss: 0.7328249216079712
Predicted: oolol
Epoch: 5, Loss: 0.6074686646461487
Predicted: oolol
Epoch: 6, Loss: 0.49092087149620056
Predicted: oolol
Epoch: 7, Loss: 0.37829408049583435
Predicted: ohlol
Epoch: 8, Loss: 0.2831459045410156
Predicted: ohlol
Epoch: 9, Loss: 0.20173685252666473
Predicted: ohlol
Epoch: 10, Loss: 0.13246771693229675
Predicted: ohlol
Epoch: 11, Loss: 0.08586840331554413
Predicted: ohlol
Epoch: 12, Loss: 0.05977001041173935
Predicted: ohlol
Epoch: 13, Loss: 0.04366439953446388
Predicted: ohlol
Epoch: 14, Loss: 0.0322958268225193
Predicted: ohlol
Epoch: 15, Loss: 0.024418506771326065


## 作业

- 学习使用 LSTM
- 学习使用 GRU 