nnlm_model.py

# -*- coding: utf-8 -*-
"""NNLM_paper.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1q6tWzcpFLzU_qvzvkdYiDaxSp--y6nFR
"""

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.autograd import Variable

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

sentences = ['i like dog','i love coffee','i hate milk']

word_list = ' '.join(sentences).split()
word_list = list(set(word_list))
print(word_list)

word_dict = {w: i for i,w in enumerate(word_list)}
print('word dict')
print(word_dict)
number_dict = {i: w for i, w in enumerate(word_list)}
print(number_dict)
n_class = len(word_dict) # number of vocabulary

print(n_class)

#NNLM Parameter
n_step = 2 # n-1 in paper
n_hidden = 2 # h in paper
m = 2       # m in paper
epochs = 5000
learning_rate = 0.001

def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input)
        target_batch.append(target)

    return input_batch,target_batch


#model

class NNLM(nn.Module):
    def __init__(self):
        super(NNLM,self).__init__()

        self.C = nn.Embedding(n_class,m)
        self.H = nn.Parameter(torch.randn(n_step * m,n_hidden).type(torch.Tensor))
        self.W = nn.Parameter(torch.randn(n_step * m,n_class).type(torch.Tensor))
        self.d = nn.Parameter(torch.randn(n_hidden).type(torch.Tensor))
        self.U = nn.Parameter(torch.randn(n_hidden,n_class).type(torch.Tensor))
        self.b = nn.Parameter(torch.randn(n_class).type(torch.Tensor))

    def forward(self,x):
        x = self.C(x)
        x = x.view(-1,n_step*m) # batch_size,n_step * n_class
        tanh = torch.tanh(self.d + torch.mm(x,self.H))
        output = self.b + torch.mm(x,self.W)+torch.mm(tanh,self.U)
        return output
model = NNLM()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

input_batch , target_batch = make_batch(sentences)
print(input_batch)
print('target_batch')
print(target_batch)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(input_batch)

    loss = criterion(output,target_batch)
    if (epoch+1)%100 == 0:
        print('epoch : {:.4f} , cost = {:.6f}'.format(epoch+1,loss))

    loss.backward()
    optimizer.step()


predict = model(input_batch).data.max(1,keepdim=True)[1]

print([sen.split()[:2] for sen in sentences],'->',[number_dict[n.item()] for n in predict.squeeze()])