# NNLM
> https://blog.csdn.net/rongsenmeng2835/article/details/108571335

In [6]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [16]:
"""
1.Basic Embedding Model
    1-1. NNLM(Neural Network Language Model)
"""

dtype = torch.FloatTensor
sentences = ["i really like dog", "i doubtfully love coffee", "i sincerely hate milk"]

word_list = " ".join(sentences).split()  # 制作词汇表
print(word_list)
word_list = list(set(word_list))  # 去除词汇表中的重复元素
print("去重后的word_list:", word_list)
word_dict = {w: i for i, w in enumerate(word_list)}  # 将每个单词对应于相应的索引
number_dict = {i: w for i, w in enumerate(word_list)}  # 将每个索引对应于相应的单词
n_class = len(word_dict)  # 单词的总数

# NNLM parameters
n_step = 3   # 根据前两个单词预测第3个单词
n_hidden = 2  # 隐藏层神经元的个数
m = 3  # 词向量的维度

['i', 'really', 'like', 'dog', 'i', 'doubtfully', 'love', 'coffee', 'i', 'sincerely', 'hate', 'milk']
去重后的word_list: ['i', 'like', 'love', 'coffee', 'hate', 'really', 'milk', 'doubtfully', 'sincerely', 'dog']


In [17]:
# 由于pytorch中输入的数据是以batch小批量进行输入的，下面的函数就是将原始数据以一个batch为基本单位喂给模型
def make_batch(sentences):
    input_batch = []
    target_batch = []
    for sentence in sentences:
        word = sentence.split()
        input = [word_dict[w] for w in word[:-1]]
        target = word_dict[word[-1]]
        input_batch.append(input)
        target_batch.append(target)
    return input_batch, target_batch

In [52]:
# Model


class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, embedding_dim=m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))

    def forward(self, x):
        x = self.C(x)
        x = x.view(-1, n_step * m)
        # x: [batch_size, n_step*n_class]
        tanh = torch.tanh(self.d + torch.mm(x, self.H))
        # tanh: [batch_size, n_hidden]
        output = self.b + torch.mm(x, self.W) + torch.mm(tanh, self.U)
        # output: [batch_size, n_class]
        return output

    def embed(self,x):
        return self.C(x)
        # print(self.C.weight)

model = NNLM()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 制作输入
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

input_batch

tensor([[0, 5, 1],
        [0, 7, 2],
        [0, 8, 4]])

In [54]:
# 开始训练
for epoch in range(1000):
    optimizer.zero_grad()
    output = model(input_batch)
# output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 1000 == 0:
        print("Epoch:{}".format(epoch + 1), "Loss:{:.3f}".format(loss))
    loss.backward()
    optimizer.step()

# 预测
predict = model(input_batch).data.max(
    1, keepdim=True)[1]  # [batch_size, n_class]
print("predict: \n", predict)
# 测试
print([sentence.split()[:3] for sentence in sentences], "---->",
      [number_dict[n.item()] for n in predict.squeeze()])
model.embed(torch.tensor([0,1,2,3,4,5,6,7,8,9]))
for word in word_list:
    word_id = word_dict[word]
    print(f"{word} -> {word_id}")
    print(f"vector of '{word}': {model.embed(torch.tensor([word_id]))}")

Epoch:1000 Loss:0.015
predict: 
 tensor([[9],
        [3],
        [6]])
[['i', 'really', 'like'], ['i', 'doubtfully', 'love'], ['i', 'sincerely', 'hate']] ----> ['dog', 'coffee', 'milk']
i -> 0
vector of 'i': tensor([[-0.6504, -1.5327, -2.0340]], grad_fn=<EmbeddingBackward0>)
like -> 1
vector of 'like': tensor([[ 0.4249, -0.7295,  0.4160]], grad_fn=<EmbeddingBackward0>)
love -> 2
vector of 'love': tensor([[ 3.0290, -0.6820,  0.7641]], grad_fn=<EmbeddingBackward0>)
coffee -> 3
vector of 'coffee': tensor([[-0.7789, -0.7452,  0.6817]], grad_fn=<EmbeddingBackward0>)
hate -> 4
vector of 'hate': tensor([[-0.9589,  0.5961, -1.3371]], grad_fn=<EmbeddingBackward0>)
really -> 5
vector of 'really': tensor([[-0.6070, -0.8506, -1.2067]], grad_fn=<EmbeddingBackward0>)
milk -> 6
vector of 'milk': tensor([[0.7398, 0.6839, 0.6022]], grad_fn=<EmbeddingBackward0>)
doubtfully -> 7
vector of 'doubtfully': tensor([[-1.6310, -1.0945, -0.2196]], grad_fn=<EmbeddingBackward0>)
sincerely -> 8
vector of 'sincere