In [1]:
import torch

In [2]:
input_size = 4
batch_size = 1
hidden_size = 4
seq_len = 5
num_layers = 1
num_class = 4

In [3]:
# Data preparation
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1,1) # size (seqLen, 1)

In [4]:
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden
    
    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)
    
net = Model(input_size, hidden_size, batch_size)

In [5]:
criterion = torch.nn.CrossEntropyLoss() # 结合了softmax 激活函数 和 负对数似然损失（Negative Log Likelihood, NLL），计算预测值和目标值之间的差异
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) # lr学习率，优化器每次更新参数的步长大小

In [6]:
# RNN cell example
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()
    print('Predicted string:', end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)
        print()
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1)
        print(idx2char[idx.item()], end='')
    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

Predicted string:
o
o
o
o
o, Epoch [1/15] loss=8.2609
Predicted string:
o
o
o
o
o, Epoch [2/15] loss=8.2429
Predicted string:
o
o
o
o
o, Epoch [3/15] loss=8.2249
Predicted string:
o
o
o
o
o, Epoch [4/15] loss=8.2069
Predicted string:
o
o
o
o
o, Epoch [5/15] loss=8.1888
Predicted string:
o
o
o
o
o, Epoch [6/15] loss=8.1707
Predicted string:
o
o
o
o
o, Epoch [7/15] loss=8.1526
Predicted string:
o
o
o
o
o, Epoch [8/15] loss=8.1345
Predicted string:
o
o
o
o
o, Epoch [9/15] loss=8.1163
Predicted string:
o
o
o
o
o, Epoch [10/15] loss=8.0981
Predicted string:
o
o
o
o
o, Epoch [11/15] loss=8.0799
Predicted string:
o
o
o
o
o, Epoch [12/15] loss=8.0616
Predicted string:
o
o
o
o
o, Epoch [13/15] loss=8.0434
Predicted string:
o
o
o
o
o, Epoch [14/15] loss=8.0251
Predicted string:
o
o
o
o
o, Epoch [15/15] loss=8.0068


In [7]:
# RNN example
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)

    def forward(self, input):
        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        out, _ = self.rnn(input, hidden)
        return out.view(-1, self.hidden_size)

net = Model(input_size, hidden_size, batch_size, num_layers)

# 损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

for epoch in range(100):
    optimizer.zero_grad()
    outputs = net(inputs)
    labels = torch.LongTensor(y_data)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, index = outputs.max(dim=1)  # max函数得到预测的概率最大的值和最大值的索引，取dim=1说明得到的是索引值，dim=0是最大值
    index = index.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in index]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

Predicted:  llllh, Epoch [1/15] loss = 1.362
Predicted:  lhllh, Epoch [2/15] loss = 1.066
Predicted:  ohloo, Epoch [3/15] loss = 0.941
Predicted:  ohloo, Epoch [4/15] loss = 0.849
Predicted:  ohlol, Epoch [5/15] loss = 0.757
Predicted:  ohlol, Epoch [6/15] loss = 0.667
Predicted:  ohlol, Epoch [7/15] loss = 0.589
Predicted:  ohlol, Epoch [8/15] loss = 0.532
Predicted:  ohlol, Epoch [9/15] loss = 0.494
Predicted:  ohlol, Epoch [10/15] loss = 0.466
Predicted:  ohlol, Epoch [11/15] loss = 0.442
Predicted:  ohlol, Epoch [12/15] loss = 0.425
Predicted:  ohlol, Epoch [13/15] loss = 0.412
Predicted:  ohlol, Epoch [14/15] loss = 0.402
Predicted:  ohlol, Epoch [15/15] loss = 0.395
Predicted:  ohlol, Epoch [16/15] loss = 0.390
Predicted:  ohlol, Epoch [17/15] loss = 0.386
Predicted:  ohlol, Epoch [18/15] loss = 0.382
Predicted:  ohlol, Epoch [19/15] loss = 0.378
Predicted:  ohlol, Epoch [20/15] loss = 0.374
Predicted:  ohlol, Epoch [21/15] loss = 0.371
Predicted:  ohlol, Epoch [22/15] loss = 0.3

In [8]:
embedding_size = 10

idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] # (batch, seq_len)
y_data = [3, 1, 2, 3, 2] # (batch * seq_len)

inputs = torch.LongTensor(x_data) # LongTensor: (batchSize, seqLen)
labels = torch.LongTensor(y_data) # LongTensor: (batchSize, seqLen)

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.emb = torch.nn.Embedding(input_size, embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size,num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emb(x) # (batch, seqLen, embeddingSize)
        x, _ = self.rnn(x, hidden) # (batch, seqLen, hidden_size)
        x = self.fc(x) # (batch, seqLen, num_class)
        return x.view(-1, num_class) # reshape to use cross entropy (batchsize, seqLen, num_class)
    
net = Model()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

# training model
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(', Epoch [%d/15] loss = %.3f' % (epoch+1, loss.item()))

Predicted:  eoeeh, Epoch [1/15] loss = 1.649
Predicted:  ooool, Epoch [2/15] loss = 1.307
Predicted:  olool, Epoch [3/15] loss = 1.140
Predicted:  olool, Epoch [4/15] loss = 1.049
Predicted:  olool, Epoch [5/15] loss = 0.978
Predicted:  olool, Epoch [6/15] loss = 0.908
Predicted:  olool, Epoch [7/15] loss = 0.834
Predicted:  ohool, Epoch [8/15] loss = 0.759
Predicted:  ohool, Epoch [9/15] loss = 0.691
Predicted:  ohlol, Epoch [10/15] loss = 0.631
Predicted:  ohlol, Epoch [11/15] loss = 0.573
Predicted:  ohlol, Epoch [12/15] loss = 0.516
Predicted:  ohlol, Epoch [13/15] loss = 0.465
Predicted:  ohlol, Epoch [14/15] loss = 0.422
Predicted:  ohlol, Epoch [15/15] loss = 0.382
