In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [2]:
dtype=torch.FloatTensor

In [3]:
sentences = [ "i like dog", "i love coffee", "i hate milk"]

In [4]:
word_list=" ".join(sentences).split()

In [5]:
word_list

['i', 'like', 'dog', 'i', 'love', 'coffee', 'i', 'hate', 'milk']

In [6]:
idx_to_word=list(set(word_list))

In [7]:
idx_to_word

['love', 'coffee', 'milk', 'hate', 'dog', 'like', 'i']

In [8]:
word_to_idx={w:i for i,w in enumerate(idx_to_word)}

In [9]:
word_to_idx

{'love': 0, 'coffee': 1, 'milk': 2, 'hate': 3, 'dog': 4, 'like': 5, 'i': 6}

In [10]:
n_class=len(word_list)

In [11]:
n_class

9

In [12]:
n_step = 2 # n-1 in paper
n_hidden = 2 # h in paper
m = 2 # m in paper

In [13]:
def make_batch(sentences):
    input_batch=[]
    target_batch=[]
    for sen in sentences:
        word=sen.split()
        input=[word_to_idx.get(n) for n in word[:-1]]
        target=word_to_idx.get(word[-1])
        input_batch.append(input)
        target_batch.append(target)
    return input_batch,target_batch

In [14]:
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))

    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]
        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
        return output

In [15]:
model=NNLM()

In [16]:
criterion=nn.CrossEntropyLoss()

In [17]:
optimizer=optim.Adam(model.parameters(),lr=1e-3)

In [18]:
input_batch, target_batch = make_batch(sentences)

In [19]:
input_batch

[[6, 5], [6, 0], [6, 3]]

In [20]:
target_batch

[4, 1, 2]

In [21]:
input_batch=torch.LongTensor(input_batch)
target_batch=torch.LongTensor(target_batch)

In [22]:
for epoch in range(5000):
    optimizer.zero_grad()
    output=model(input_batch)
    loss=criterion(output,target_batch)
    if (epoch+1)%100==0:
        print('epoch:','%04d'%(epoch+1),'cost:','{:.6f}'.format(loss))
    loss.backward()
    optimizer.step()

epoch: 0100 cost: 1.172833
epoch: 0200 cost: 0.663027
epoch: 0300 cost: 0.421013
epoch: 0400 cost: 0.277211
epoch: 0500 cost: 0.188496
epoch: 0600 cost: 0.134336
epoch: 0700 cost: 0.100517
epoch: 0800 cost: 0.078400
epoch: 0900 cost: 0.063194
epoch: 1000 cost: 0.052262
epoch: 1100 cost: 0.044099
epoch: 1200 cost: 0.037811
epoch: 1300 cost: 0.032842
epoch: 1400 cost: 0.028828
epoch: 1500 cost: 0.025528
epoch: 1600 cost: 0.022774
epoch: 1700 cost: 0.020445
epoch: 1800 cost: 0.018454
epoch: 1900 cost: 0.016735
epoch: 2000 cost: 0.015238
epoch: 2100 cost: 0.013925
epoch: 2200 cost: 0.012766
epoch: 2300 cost: 0.011737
epoch: 2400 cost: 0.010818
epoch: 2500 cost: 0.009993
epoch: 2600 cost: 0.009251
epoch: 2700 cost: 0.008579
epoch: 2800 cost: 0.007970
epoch: 2900 cost: 0.007415
epoch: 3000 cost: 0.006908
epoch: 3100 cost: 0.006445
epoch: 3200 cost: 0.006019
epoch: 3300 cost: 0.005627
epoch: 3400 cost: 0.005266
epoch: 3500 cost: 0.004933
epoch: 3600 cost: 0.004624
epoch: 3700 cost: 0.004338
e

In [23]:
# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

# Test
print([sen.split()[:2] for sen in sentences], '->', [idx_to_word[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
