<a href="https://colab.research.google.com/github/LeeGitaek/NNLM_Paper_Implementation/blob/master/NNLM_paper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import numpy as np 
from torch.autograd import Variable


In [32]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [33]:
sentences = ['i like dog','i love coffee','i hate milk']

word_list = ' '.join(sentences).split()
word_list = list(set(word_list))
print(word_list)

word_dict = {w: i for i,w in enumerate(word_list)}
print('word dict')
print(word_dict)
number_dict = {i: w for i, w in enumerate(word_list)}
print(number_dict)
n_class = len(word_dict) # number of vocabulary

print(n_class)

['dog', 'love', 'like', 'milk', 'hate', 'i', 'coffee']
word dict
{'dog': 0, 'love': 1, 'like': 2, 'milk': 3, 'hate': 4, 'i': 5, 'coffee': 6}
{0: 'dog', 1: 'love', 2: 'like', 3: 'milk', 4: 'hate', 5: 'i', 6: 'coffee'}
7


In [34]:
#NNLM Parameter
n_step = 2 # n-1 in paper
n_hidden = 2 # h in paper 
m = 2       # m in paper 
epochs = 5000
learning_rate = 0.001

In [35]:
def make_batch(sentences):
    input_batch = []
    target_batch = []
    
    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input)
        target_batch.append(target)
    
    return input_batch,target_batch


#model 

class NNLM(nn.Module):
    def __init__(self):
        super(NNLM,self).__init__()

        self.C = nn.Embedding(n_class,m)
        self.H = nn.Parameter(torch.randn(n_step * m,n_hidden).type(torch.Tensor))
        self.W = nn.Parameter(torch.randn(n_step * m,n_class).type(torch.Tensor))
        self.d = nn.Parameter(torch.randn(n_hidden).type(torch.Tensor))
        self.U = nn.Parameter(torch.randn(n_hidden,n_class).type(torch.Tensor))
        self.b = nn.Parameter(torch.randn(n_class).type(torch.Tensor))
    
    def forward(self,x):
        x = self.C(x)
        x = x.view(-1,n_step*m) # batch_size,n_step * n_class
        tanh = torch.tanh(self.d + torch.mm(x,self.H))
        output = self.b + torch.mm(x,self.W)+torch.mm(tanh,self.U)
        return output
model = NNLM()


In [36]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

input_batch , target_batch = make_batch(sentences)
print(input_batch)
print('target_batch')
print(target_batch)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))


[[5, 2], [5, 1], [5, 4]]
target_batch
[0, 6, 3]


In [38]:
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(input_batch)

    loss = criterion(output,target_batch)
    if (epoch+1)%100 == 0:
        print('epoch : {:.4f} , cost = {:.6f}'.format(epoch+1,loss))

    loss.backward()
    optimizer.step()


predict = model(input_batch).data.max(1,keepdim=True)[1]

print([sen.split()[:2] for sen in sentences],'->',[number_dict[n.item()] for n in predict.squeeze()])


epoch : 100.0000 , cost = 0.002870
epoch : 200.0000 , cost = 0.002688
epoch : 300.0000 , cost = 0.002518
epoch : 400.0000 , cost = 0.002360
epoch : 500.0000 , cost = 0.002213
epoch : 600.0000 , cost = 0.002076
epoch : 700.0000 , cost = 0.001949
epoch : 800.0000 , cost = 0.001829
epoch : 900.0000 , cost = 0.001718
epoch : 1000.0000 , cost = 0.001614
epoch : 1100.0000 , cost = 0.001516
epoch : 1200.0000 , cost = 0.001425
epoch : 1300.0000 , cost = 0.001340
epoch : 1400.0000 , cost = 0.001260
epoch : 1500.0000 , cost = 0.001185
epoch : 1600.0000 , cost = 0.001115
epoch : 1700.0000 , cost = 0.001050
epoch : 1800.0000 , cost = 0.000988
epoch : 1900.0000 , cost = 0.000930
epoch : 2000.0000 , cost = 0.000876
epoch : 2100.0000 , cost = 0.000825
epoch : 2200.0000 , cost = 0.000777
epoch : 2300.0000 , cost = 0.000732
epoch : 2400.0000 , cost = 0.000690
epoch : 2500.0000 , cost = 0.000650
epoch : 2600.0000 , cost = 0.000613
epoch : 2700.0000 , cost = 0.000578
epoch : 2800.0000 , cost = 0.000545
e