<a href="https://colab.research.google.com/github/BarryLiu-97/Pytorch-Tutorial/blob/master/10_Basic_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import torch

In [4]:
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

# 示例

In [10]:
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
#(seq, batch, features)
dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)

In [9]:
for idx, input in enumerate(dataset):
  print('=' * 20, idx, '=' * 20)
  print('Input size:', input.shape)

  hidden = cell(input, hidden)

  print('outputs size:', hidden.shape)
  print(hidden)

Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[0.4127, 0.9934]], grad_fn=<TanhBackward>)
Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[-0.8046, -0.6573]], grad_fn=<TanhBackward>)
Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[ 0.8152, -0.0012]], grad_fn=<TanhBackward>)


# 使用RNN  
cell中  
input表示整个输入序列  
hidden表示h<sub>0</sub>  
输出中   
out表示h<sub>1</sub>～h<sub>N</sub>   
hidden表示h<sub>N</sub>    
input of shape(SeqSize, batch, input_size)  
hidden of shape(numLayers, batch, hidden_size)  
output of shape(SeqLen, batch, hidden_size)  
hidden of shape(numLayers, batch, hidden_size) 

In [16]:
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

In [17]:
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size)

#(seq, batch, features)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)

out, hidden = cell(inputs, hidden) 

In [18]:
print('Output size:', out.shape)
print('Output:', out)
print('HIdden size:', hidden.shape)
print('Hidden:', hidden.shape)

Output size: torch.Size([3, 1, 2])
Output: tensor([[[ 0.8381, -0.7957]],

        [[ 0.9516, -0.9500]],

        [[ 0.9926, -0.9898]]], grad_fn=<StackBackward>)
HIdden size: torch.Size([1, 1, 2])
Hidden: torch.Size([1, 1, 2])


# 一个小例子

In [1]:
import torch
input_size = 4
hidden_size = 4
batch_size = 1

In [2]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
          [0,1,0,0],
          [0,0,1,0],
          [0,0,0,1],                
          ]
x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1,1)

## 若使用RNNCell

In [3]:
class Model(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size):
    super(Model, self).__init__()
    self.batch_size = batch_size
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
                     hidden_size=self.hidden_size)

  def forward(self, input, hidden):
    hidden = self.rnncell(input, hidden)
    return hidden

  def init_hidden(self):
    return torch.zeros(self.batch_size, self.hidden_size)

net = Model(input_size, hidden_size, batch_size)

In [4]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

In [5]:
for epoch in range(15):
  loss = 0
  optimizer.zero_grad()
  hidden = net.init_hidden()
  print('Predicted string:', end='')
  for input, label in zip(inputs, labels):
    hidden = net(input, hidden)      
    # Shape of Inputs:(seqLen, batchSize, inputSize) 
    # Shape of Input:(batchSize, inputSize)
    # Shape of labels:(seqSize, 1) 
    # Shape of label:(1)
    loss += criterion(hidden, label)   # loss因为是多个h产生的loss之和，在计算图中，故不用item
    _, idx = hidden.max(dim=1)  # 获取hidden最大值的下标
    print(idx2char[idx.item()], end='')
  loss.backward()
  optimizer.step()
  print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

Predicted string:ooeoo, Epoch [1/15] loss=7.1416
Predicted string:ooloo, Epoch [2/15] loss=5.8359
Predicted string:ohllh, Epoch [3/15] loss=5.0091
Predicted string:ohlll, Epoch [4/15] loss=4.6407
Predicted string:ohlll, Epoch [5/15] loss=4.3732
Predicted string:ohlll, Epoch [6/15] loss=3.9806
Predicted string:ohlll, Epoch [7/15] loss=3.5712
Predicted string:ohlol, Epoch [8/15] loss=3.2520
Predicted string:ohlol, Epoch [9/15] loss=3.0108
Predicted string:ohlol, Epoch [10/15] loss=2.7864
Predicted string:ohlol, Epoch [11/15] loss=2.5669
Predicted string:ohlol, Epoch [12/15] loss=2.3907
Predicted string:ohlol, Epoch [13/15] loss=2.2748
Predicted string:ohlol, Epoch [14/15] loss=2.2070
Predicted string:ohlol, Epoch [15/15] loss=2.1636


## 若使用RNN

In [2]:
import torch
input_size = 4
hidden_size = 4
batch_size = 1
num_layers = 1
seq_len = 5

In [3]:
class Model(torch.nn.Module):
  def __init__(self, innput_size, hidden_size, batch_size, num_layers=1):
    super(Model, self).__init__()
    self.num_layers = num_layers
    self.batch_size = batch_size
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=num_layers)

  def forward(self, input):
    hidden = torch.zeros(self.num_layers,
              self.batch_size,
              self.hidden_size)
    out, _ =self.rnn(input, hidden)
    return out.view(-1, self.hidden_size)# reshape out to: (seqLen × batchSize, hiddenSize)

net = Model(input_size, hidden_size, batch_size, num_layers)

In [5]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]

one_hot_lookup = [[1,0,0,0],
          [0,1,0,0],
          [0,0,1,0],
          [0,0,0,1],                
          ]
x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size) # Shape of inputs:(seqLen, batchSize, hiddenSize)
labels = torch.LongTensor(y_data)

In [7]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

In [9]:
for epoch in range(15):
  optimizer.zero_grad()
  outputs = net(inputs)
  loss = criterion(outputs, labels)
  loss.backward() 
  optimizer.step()

  _, idx = outputs.max(dim=1)  # 获取hidden最大值的下标
  idx = idx.data.numpy()
  print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
  print(', Epoch [%d/15] loss=%.3f' % (epoch+1, loss.item()))

Predicted:  ohooo, Epoch [1/15] loss=1.126
Predicted:  ohlll, Epoch [2/15] loss=1.010
Predicted:  ohlll, Epoch [3/15] loss=0.931
Predicted:  ohlll, Epoch [4/15] loss=0.866
Predicted:  ohlol, Epoch [5/15] loss=0.807
Predicted:  ohlol, Epoch [6/15] loss=0.745
Predicted:  ohlol, Epoch [7/15] loss=0.683
Predicted:  ohlol, Epoch [8/15] loss=0.632
Predicted:  ohlol, Epoch [9/15] loss=0.591
Predicted:  ohlol, Epoch [10/15] loss=0.554
Predicted:  ohlol, Epoch [11/15] loss=0.515
Predicted:  ohlol, Epoch [12/15] loss=0.474
Predicted:  ohlol, Epoch [13/15] loss=0.442
Predicted:  ohlol, Epoch [14/15] loss=0.421
Predicted:  ohlol, Epoch [15/15] loss=0.410


## 独热向量特点  
- 维度太高
- 稀疏
- 硬编码
## 嵌入层(Embedding)--好  
- 低维
- 稠密
- 从数据中学习

In [None]:
class Model(torch.nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.emb = torcch.nn.Embedding(input_size, embedding_size)
    self.rnn = torch.nn.RNN(input_size=embedding_size,  
                hidden_size = hidden_size,
                num_layers=num+layers,
                batch_size=True)
    self.fc = torch.nn.Linear(hidden_size, num_class)

  def forward(self, x):
    hidden = torch.zeros(num_layers, x.size(0), hidden_size)
    x = self.emb(x) # (batch, aeqLen, embeddingSize)
    x, _ = self.rnn(x, hidden)
    x = self.fc(x)
    return x.view(-1, num_class)

In [None]:
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5

In [None]:
idx2char = ['e', 'h', 'l', 'o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]
# Input是LongTensor, (batchSize, seqLen)
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

In [None]:
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)