In [None]:
# rnn(recurrent neural network)

# 核心公式
# h_t = tanh(W_x * x_t + W_h * h_t-1 + b)
# y_t = softmax(W_y * h_t + b)
# 输入 x_t 是一个向量，长度为 input_size
# 隐藏状态 h_t-1 是一个向量，长度为 hidden_size
# 输出 y_t 是一个向量，长度为 output_size

sentences = ["我 喜欢 玩具", "我 爱 爸爸", "我 讨厌 挨打", "我 不喜欢 吵架"]
word_list = list(set(" ".join(sentences).split()))

word_to_idx = {word: idx for idx, word in enumerate(word_list)}
idx_to_word = {idx: word for idx, word in enumerate(word_list)}
voc_size = len(word_list)
print("词汇表", word_list)
print("词汇到索引", word_to_idx)
print("索引到词汇", idx_to_word)
print("词汇表大小", len(word_list))

词汇表 ['喜欢', '不喜欢', '吵架', '爸爸', '玩具', '讨厌', '挨打', '我', '爱']
词汇到索引 {'喜欢': 0, '不喜欢': 1, '吵架': 2, '爸爸': 3, '玩具': 4, '讨厌': 5, '挨打': 6, '我': 7, '爱': 8}
索引到词汇 {0: '喜欢', 1: '不喜欢', 2: '吵架', 3: '爸爸', 4: '玩具', 5: '讨厌', 6: '挨打', 7: '我', 8: '爱'}
词汇表大小 9


In [5]:
import torch 
import random

batch_size = 2
def make_batch(sentences, batch_size):
    input_batch = []
    target_batch = []
    selected_sentences = random.sample(sentences, batch_size) # 随机选择batch_size个句子
    for sentence in selected_sentences:
        words = sentence.split()
        inp = [word_to_idx[word] for word in words[:-1]] # 输入是除了最后一个词的所有词
        tgt = word_to_idx[words[-1]] # 目标是最后一个词
        input_batch.append(inp)
        target_batch.append(tgt)
    input_batch = torch.LongTensor(input_batch)
    # print("input.shape", input_batch.shape)
    target_batch = torch.LongTensor(target_batch)
    # print("target.shape", target_batch.shape)
    return input_batch, target_batch

input_batch, target_batch = make_batch(sentences, batch_size)
print("输入批处理数据", input_batch)
input_words = []
for input_idxs in input_batch:
    input_words.append([idx_to_word[idx.item()] for idx in input_idxs])
print("输入批处理数据对应原始词", input_words)

print("目标批处理数据", target_batch)
target_words = [idx_to_word[idx.item()] for idx in target_batch]
print("目标批处理数据对应原始词", target_words)

输入批处理数据 tensor([[7, 1],
        [7, 5]])
输入批处理数据对应原始词 [['我', '不喜欢'], ['我', '讨厌']]
目标批处理数据 tensor([2, 6])
目标批处理数据对应原始词 ['吵架', '挨打']


In [None]:
import torch.nn as nn

class NPLM(nn.Module):
    def __init__(self, voc_size, embedding_size, n_hidden):
        super(NPLM, self).__init__()
        # 从词汇表大小到嵌入层大小的线性层
        self.C = nn.Embedding(voc_size, embedding_size)
        # 如果batch_first=True, 输入的形状为[batch_size, seq_len, input_size]
        # 如果batch_first=False, 输入的形状为[seq_len, batch_size, input_size]
        self.lstm = nn.LSTM(embedding_size, n_hidden, batch_first=True)
        self.linear = nn.Linear(n_hidden, voc_size)
    
    def forward(self, X): # X: [batch_size. nstep]
        X = self.C(X) # [batch_size, nstep, embedding_size]
        lstm_out, _ = self.lstm(X) # [batch_size, nstep, n_hidden]
        # 只选最后一个时间步的输出
        last_lstm_out = lstm_out[:,-1,:] # [batch_size, 1, n_hidden]
        output = self.linear(last_lstm_out) # 
        return output

In [9]:

n_hidden = 2
embedding_size = 2
model = NPLM(voc_size, embedding_size, n_hidden)
print("NPLM 模型结构", model)

NPLM 模型结构 NPLM(
  (C): Embedding(9, 2)
  (lstm): LSTM(2, 2, batch_first=True)
  (linear): Linear(in_features=2, out_features=9, bias=True)
)


In [11]:
import torch.optim as optim

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

epoches = 5000
for epoch in range(epoches):
    optimizer.zero_grad()
    input_batch, target_batch = make_batch(sentences, batch_size)
    pred = model(input_batch)
    loss = loss_fn(pred, target_batch)
    if (epoch + 1) % 1000 == 0:
        print(f"epoch {epoch + 1} loss: {loss.item():.4f}")
    loss.backward()
    optimizer.step()

epoch 1000 loss: 0.0233
epoch 2000 loss: 0.0014
epoch 3000 loss: 0.0003
epoch 4000 loss: 0.0002
epoch 5000 loss: 0.0001


In [16]:
input_strs = [["我", "爱"], ["我", "不喜欢"]]
input_idxs = [[word_to_idx[word] for word in input_str] for input_str in input_strs]

input_batch = torch.LongTensor(input_idxs)
# 预测最大的idx
print(model(input_batch).data.shape)
# print(model(input_batch).data.max(dim=1))
predict = model(input_batch).data.max(dim=1).indices # [batch_size, 1]
print("predict.shape", predict.shape)
# print("predict.sequeeze.shape", predict.squeeze().shape)
predict_strs = [idx_to_word[idx.item()] for idx in predict]

for input_seq, pred in zip(input_strs, predict_strs):
    print(input_seq, "->", pred)

torch.Size([2, 9])
predict.shape torch.Size([2])
['我', '爱'] -> 爸爸
['我', '不喜欢'] -> 吵架
