In [1]:
import torch
import torch.nn as nn
import torch.optim as optimizer
import torch.utils.data as Data

dtype = torch.FloatTensor

In [2]:
sentences = ['i like cat', 'i love coffee', 'i hate milk']
sentences_list = " ".join(sentences).split()
vocab = list(set(sentences_list))
word2idx = {w: i for i, w in enumerate(vocab)}
idx2word = {i: w for i, w in enumerate(vocab)}
V = len(vocab)

In [3]:
def make_data(sentences):
    input_data = []
    target_data = []
    for sen in sentences:
        sen = sen.split()
        input_tmp = [word2idx[w] for w in sen[:-1]]
        target_tmp = word2idx[sen[-1]]

        input_data.append(input_tmp)
        target_data.append(target_tmp)
    return input_data, target_data

In [4]:
input_data, target_data = make_data(sentences)
input_data, target_data = torch.LongTensor(input_data), torch.LongTensor(target_data)
dataset = Data.TensorDataset(input_data, target_data)
loader = Data.DataLoader(dataset=dataset, batch_size=16, shuffle=True)

In [5]:
input_data, target_data

(tensor([[4, 2],
         [4, 1],
         [4, 0]]),
 tensor([3, 6, 5]))

In [6]:
word2idx

{'hate': 0, 'love': 1, 'like': 2, 'cat': 3, 'i': 4, 'milk': 5, 'coffee': 6}

In [7]:
m = 2  #  一个词用多少维向量表示
n_step = 2
n_hidden = 10

In [8]:
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(V, m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.b = nn.Parameter(torch.randn(V).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, V).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, V).type(dtype))

    def forward(self, X):
        '''
        x : [batch_size, n_step]
        '''
        X = self.C(X)  # X : [batch_size, n_step, m]
        X = X.view(-1, n_step * m) #  [batch_size, n_step * m]
        hidden_out = torch.tanh(self.d + torch.mm(X, self.H))
        output =  self.b + torch.mm(X, self.W) + torch.mm(hidden_out, self.U)
        return output

model = NNLM()
optim = optimizer.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

In [9]:
for epoch in range(5000):
    for batch_x, batch_y in loader:
        pred = model(batch_x)
        loss = criterion(pred, batch_y)
        if (epoch + 1) % 1000 == 0:
            print(epoch + 1, loss.item())
        optim.zero_grad()
        loss.backward()
        optim.step()

1000 0.018417200073599815
2000 0.003142619738355279
3000 0.0012410400668159127
4000 0.0006218639318831265
5000 0.0003407046606298536


In [10]:
#  Pred
pred = model(input_data).max(1, keepdim=True)[1]
print([idx2word[idx.item()] for idx in pred.squeeze()])


['cat', 'coffee', 'milk']
