In [1]:
import torch

## Use RNNCell

In [8]:
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

cell = torch.nn.RNNCell(input_size = input_size, hidden_size = hidden_size)

dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)

for idx, input in enumerate(dataset):
    print('='*20, idx, '='*20)
    print('Input size: ', input.shape)
    hidden = cell(input, hidden)
    print('Output size: ', hidden.shape)
    print(hidden)

Input size:  torch.Size([1, 4])
Output size:  torch.Size([1, 2])
tensor([[0.3474, 0.1900]], grad_fn=<TanhBackward0>)
Input size:  torch.Size([1, 4])
Output size:  torch.Size([1, 2])
tensor([[ 0.9199, -0.9888]], grad_fn=<TanhBackward0>)
Input size:  torch.Size([1, 4])
Output size:  torch.Size([1, 2])
tensor([[-0.4827, -0.6530]], grad_fn=<TanhBackward0>)


## Use RNN

In [9]:
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

cell = torch.nn.RNN(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers)

inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)

out, hidden = cell(inputs, hidden)

print('Output size:', out.shape) #  𝑠𝑒𝑞𝐿𝑒𝑛, 𝑏𝑎𝑡𝑐ℎ𝑆𝑖𝑧𝑒, ℎ𝑖𝑑𝑑𝑒𝑛𝑆𝑖𝑧𝑒
print('Output:', out)
print('Hidden size: ', hidden.shape) # 𝒏𝒖𝒎𝑳𝒂𝒚𝒆𝒓𝒔, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛e
print('Hidden: ', hidden)

Output size: torch.Size([3, 1, 2])
Output: tensor([[[ 0.4038,  0.1125]],

        [[-0.8523, -0.6146]],

        [[-0.8416,  0.5001]]], grad_fn=<StackBackward0>)
Hidden size:  torch.Size([1, 1, 2])
Hidden:  tensor([[[-0.8416,  0.5001]]], grad_fn=<StackBackward0>)


## Example 1: 'hello'->'ohlol'

Use RNNCell

In [19]:
import torch 

# ------------Parameters------------#
input_size = 4
hidden_size = 4
batch_size = 1

# ------------Prepare Data------------#
idx2char = ['e', 'h','l','o'] # dictionary
x_data = [1,0,0,2,3] # hello
y_data = [3,1,2,3,2] # ohlol

one_hot_lookup=[[1,0,0,0],
                [0,1,0,0],
                [0,0,1,0],
                [0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # seqLen, inputSize

inputs = torch.Tensor(x_one_hot).view(-1,batch_size, input_size) # seqLen, batchSize, inputSize
labels = torch.LongTensor(y_data).view(-1,1) # seqLen, 1

# ------------Design Model------------#
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model,self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size = self.input_size, hidden_size = self.hidden_size)
    
    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden) # input(batch_size, input_size); hidden(batch_size, hidden_size)
        return hidden
    
    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)
    
net = Model(input_size, hidden_size, batch_size)

# ------------Loss and Optimizer------------#
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

# ------------Training Cycle------------#
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()
    print('Predicted string: ', end='')
    for input,label in zip(inputs, labels):
        hidden = net(input, hidden) # RNN Cell
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1) # Output prediction
        print(idx2char[idx.item()],end='')
    loss.backward()
    optimizer.step()
    print(' , Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

# Shape of inputs :𝒔𝒆𝒒𝑳𝒆𝒏, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆,𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆
# Shape of input: 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆
# Shape of labels :𝒔𝒆𝒒𝑺𝒊𝒛𝒆, 𝟏
# Shape of label: 𝟏

Predicted string: oehoo , Epoch [1/15] loss=6.7808
Predicted string: ollll , Epoch [2/15] loss=5.5469
Predicted string: ollll , Epoch [3/15] loss=4.8716
Predicted string: ollll , Epoch [4/15] loss=4.4298
Predicted string: ollll , Epoch [5/15] loss=4.1009
Predicted string: ohlll , Epoch [6/15] loss=3.8183
Predicted string: ohlll , Epoch [7/15] loss=3.5518
Predicted string: ohlll , Epoch [8/15] loss=3.3007
Predicted string: ohlll , Epoch [9/15] loss=3.0770
Predicted string: ohlol , Epoch [10/15] loss=2.8850
Predicted string: ohlol , Epoch [11/15] loss=2.7138
Predicted string: ohlol , Epoch [12/15] loss=2.5553
Predicted string: ohlol , Epoch [13/15] loss=2.4400
Predicted string: ohlol , Epoch [14/15] loss=2.3627
Predicted string: ohlol , Epoch [15/15] loss=2.2901


Use RNN

In [24]:
import torch 

# ------------Parameters------------#
input_size = 4
hidden_size = 4
num_layers = 1
batch_size = 1
seq_len = 5

# ------------Prepare Data------------#
idx2char = ['e', 'h','l','o'] # dictionary
x_data = [1,0,0,2,3] # hello
y_data = [3,1,2,3,2] # ohlol

one_hot_lookup=[[1,0,0,0],
                [0,1,0,0],
                [0,0,1,0],
                [0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # seqLen, inputSize

inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size) # seqLen, batchSize, inputSize
labels = torch.LongTensor(y_data) # 𝒔𝒆𝒒𝑳𝒆𝒏 × 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆

# ------------Design Model------------#
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(Model,self).__init__()
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size = self.input_size,
                                hidden_size = self.hidden_size,
                                num_layers = self.num_layers)
    
    def forward(self, input):
        hidden = torch.zeros(self.num_layers,
                             self.batch_size,
                             self.hidden_size)
        out, _ = self.rnn(input, hidden)
        return out.view(-1, self.hidden_size) # (𝒔𝒆𝒒𝑳𝒆𝒏 × 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛e)
    
net = Model(input_size, hidden_size, batch_size, num_layers)

# ------------Loss and Optimizer------------#
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

# ------------Training Cycle------------#
for epoch in range(25):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

Predicted:  lhhhh, Epoch [1/15] loss = 1.488
Predicted:  lhhll, Epoch [2/15] loss = 1.305
Predicted:  lhlll, Epoch [3/15] loss = 1.148
Predicted:  lhlol, Epoch [4/15] loss = 1.036
Predicted:  lolol, Epoch [5/15] loss = 0.959
Predicted:  lolol, Epoch [6/15] loss = 0.898
Predicted:  oolol, Epoch [7/15] loss = 0.846
Predicted:  oolol, Epoch [8/15] loss = 0.801
Predicted:  oolol, Epoch [9/15] loss = 0.761
Predicted:  oolol, Epoch [10/15] loss = 0.727
Predicted:  ohlol, Epoch [11/15] loss = 0.695
Predicted:  ohlol, Epoch [12/15] loss = 0.667
Predicted:  ohlol, Epoch [13/15] loss = 0.641
Predicted:  ohlol, Epoch [14/15] loss = 0.617
Predicted:  ohlol, Epoch [15/15] loss = 0.596
Predicted:  ohlol, Epoch [16/15] loss = 0.578
Predicted:  ohlol, Epoch [17/15] loss = 0.562
Predicted:  ohlol, Epoch [18/15] loss = 0.548
Predicted:  ohlol, Epoch [19/15] loss = 0.536
Predicted:  ohlol, Epoch [20/15] loss = 0.526
Predicted:  ohlol, Epoch [21/15] loss = 0.517
Predicted:  ohlol, Epoch [22/15] loss = 0.5

将one-hot编码替换成embedding

In [26]:
import torch 

# ------------Parameters------------#
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5

# ------------Prepare Data------------#
idx2char = ['e', 'h','l','o'] # dictionary
x_data = [[1,0,0,2,3]] # hello  # (batch, seq_len)
y_data = [3,1,2,3,2] # ohlol  (batch * seq_len)

inputs = torch.LongTensor(x_data) # Input should be LongTensor: 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏
labels = torch.LongTensor(y_data) # Target should be LongTensor: 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆 × 𝒔𝒆𝒒𝑳𝒆𝒏

# ------------Design Model------------#
class Model(torch.nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.emb = torch.nn.Embedding(input_size, embedding_size) # Lookup matrix of Embedding:𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆, 𝒆𝒎𝒃𝒆𝒅𝒅𝒊𝒏𝒈𝑺𝒊𝒛𝒆
        self.rnn = torch.nn.RNN(input_size = embedding_size,
                                hidden_size = hidden_size,
                                num_layers = num_layers,
                                batch_first = True)
        self.fc = torch.nn.Linear(hidden_size, num_class)
    
    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        
        x = self.emb(x) 
#         Input should be LongTensor:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏
#         Output with shape:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏, 𝒆𝒎𝒃𝒆𝒅𝒅𝒊𝒏𝒈𝑺𝒊𝒛𝒆
#         Notice: batch FIRST

        x, _ = self.rnn(x, hidden)
#         Input of RNN:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏, 𝒆𝒎𝒃𝒆𝒅𝒅𝒊𝒏𝒈𝑺𝒊𝒛𝒆
#         Output of RNN:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆
        
        x = self.fc(x)
#         Input of FC Layer:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏, 𝒉𝒊𝒅𝒅𝒆𝒏𝑺𝒊𝒛𝒆
#         Output of FC Layer:𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒔𝒆𝒒𝑳𝒆𝒏,𝒏𝒖𝒎𝑪𝒍𝒂𝒔s

        return x.view(-1, num_class) # (𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆 × 𝒔𝒆𝒒𝑳𝒆𝒏, 𝒏𝒖𝒎𝑪𝒍𝒂𝒔𝒔)
    
net = Model()

# ------------Loss and Optimizer------------#
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

# ------------Training Cycle------------#
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

Predicted:  heeee, Epoch [1/15] loss = 1.437
Predicted:  ollol, Epoch [2/15] loss = 1.127
Predicted:  ollol, Epoch [3/15] loss = 0.923
Predicted:  ollol, Epoch [4/15] loss = 0.736
Predicted:  ollol, Epoch [5/15] loss = 0.567
Predicted:  ohlol, Epoch [6/15] loss = 0.410
Predicted:  ohlol, Epoch [7/15] loss = 0.285
Predicted:  ohlol, Epoch [8/15] loss = 0.185
Predicted:  ohlol, Epoch [9/15] loss = 0.118
Predicted:  ohlol, Epoch [10/15] loss = 0.078
Predicted:  ohlol, Epoch [11/15] loss = 0.053
Predicted:  ohlol, Epoch [12/15] loss = 0.037
Predicted:  ohlol, Epoch [13/15] loss = 0.026
Predicted:  ohlol, Epoch [14/15] loss = 0.019
Predicted:  ohlol, Epoch [15/15] loss = 0.014
