In [30]:
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable  #自动求导

dtype = torch.FloatTensor

sentences = [ "i like cat", "i love coffee", "i hate milk"]

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
print(word_list)
word_dict = {w: i for i, w in enumerate(word_list)}
print("word_dict:",word_dict)
number_dict = {i: w for i, w in enumerate(word_list)}
print("number_dict:",number_dict)

n_class = len(word_dict) # number of Vocabulary

# NNLM Parameter
n_step = 2 # n-1 in paper
n_hidden = 2 # h in paper
m = 2 # m in paper

#通过这个make_batch函数，获得输入的number和目标词的number
def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        print(input)
        target = word_dict[word[-1]]
        print(target)

        input_batch.append(input)  #输入
        target_batch.append(target)
    print("input_batch:",input_batch)

    return input_batch, target_batch

# Model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__() 
        self.C = nn.Embedding(n_class, m)   #7个词  2维的embedding
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))

    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]  view的作用是reshape张量形状  这里-1代表不确定行数
        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
        return output

model = NNLM()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

# Training
for epoch in range(1000):

    optimizer.zero_grad()
    output = model(input_batch)

    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)   #计算output和真正目标之间的差 以调整网络参数
    if (epoch + 1)%100 == 0:
        print('Epoch:', '%03d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()

# Predict  关键就是这个predict
print(model(input_batch).data) 
predict = model(input_batch).data.max(1, keepdim=True)[1]   #.max(1)是输出前面结果每一行的最大值和所在索引  是不是说相当于第一句i like来说，索引为3的“cat”预测所得概率最大 其余类似
print(predict.shape)
print(predict.reshape(1,3))
print(predict.reshape(3,))

# Test
print([' '.join(sen.split()[:2]) for sen in sentences], '->', [number_dict[n.item()] for n in predict.reshape(3,)])


['i', 'cat', 'love', 'hate', 'coffee', 'milk', 'like']
word_dict: {'i': 0, 'cat': 1, 'love': 2, 'hate': 3, 'coffee': 4, 'milk': 5, 'like': 6}
number_dict: {0: 'i', 1: 'cat', 2: 'love', 3: 'hate', 4: 'coffee', 5: 'milk', 6: 'like'}
[0, 6]
1
[0, 2]
4
[0, 3]
5
input_batch: [[0, 6], [0, 2], [0, 3]]
Epoch: 100 cost = 1.078278
Epoch: 200 cost = 0.627802
Epoch: 300 cost = 0.414748
Epoch: 400 cost = 0.289781
Epoch: 500 cost = 0.211312
Epoch: 600 cost = 0.160302
Epoch: 700 cost = 0.125704
Epoch: 800 cost = 0.101124
Epoch: 900 cost = 0.082913
Epoch: 1000 cost = 0.068961
tensor([[-0.7512,  7.0253,  1.6689, -4.3078, -1.9751,  3.7274, -9.6952],
        [-1.6311, -5.2685, -2.7448, -5.0243,  4.6394,  1.6395, -3.3665],
        [-2.8028,  0.8431, -0.6398, -5.4192,  1.2543,  3.9763, -6.7107]])
torch.Size([3, 1])
tensor([[1, 4, 5]])
tensor([1, 4, 5])
['i like', 'i love', 'i hate'] -> ['cat', 'coffee', 'milk']
