In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

dtype = torch.FloatTensor

In [2]:
char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word_dict = {n: i for i, n in enumerate(char_arr)}
number_dict = {i: w for i, w in enumerate(char_arr)}
n_class = len(word_dict) # number of class(=number of vocab)

In [3]:
seq_data = ['make', 'need', 'coal', 'word', 'love', 
            'hate', 'live', 'home', 'hash', 'star']

# LSTM parameters
n_steps = 3
n_hidden = 128

In [5]:
def make_batch(seq_data):
    input_batch, target_batch = [], []
    for seq in seq_data:
        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
        target = word_dict[seq[-1]] # 'e' is target
        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)
        
    # tensor 
    return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))

In [10]:
class TextLSTM(nn.Module):
    def __init__(self):
        super(TextLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
        self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
        self.b = nn.Parameter(torch.randn([n_class])).type(dtype)
    
    def forward(self, X):
        # X : [n_step, batch_size, n_class]
        input = X.transpose(0,1)
        
         # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        hidden_state = Variable(torch.zeros(1, len(X), n_hidden))
        # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        cell_state = Variable(torch.zeros(1, len(X),  n_hidden))
        
        outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
        # [batch_size, n_hidden]
        outputs = outputs[-1]
        # model : [batch_size, n_class]
        model = torch.mm(outputs, self.W) + self.b
        return model

In [11]:
input_batch, target_batch = make_batch(seq_data)
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
output = model(input_batch)

In [12]:
# training
for epoch in range(1000):
    optimizer.zero_grad()
    output = model(input_batch)
    loss = criterion(output, target_batch)
    
    if (epoch+1)%100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    
    loss.backward()
    optimizer.step()

Epoch: 0100 cost = 0.013372
Epoch: 0200 cost = 0.003112
Epoch: 0300 cost = 0.001417
Epoch: 0400 cost = 0.000816
Epoch: 0500 cost = 0.000533
Epoch: 0600 cost = 0.000376
Epoch: 0700 cost = 0.000280
Epoch: 0800 cost = 0.000217
Epoch: 0900 cost = 0.000173
Epoch: 1000 cost = 0.000141


In [13]:
# test
inputs = [sen[:3] for sen in seq_data]

predicts = model(input_batch).data.max(1, keepdim=True)[1]

In [15]:
print(inputs, '->', [number_dict[n.item()] for n in predicts.squeeze()])

['mak', 'nee', 'coa', 'wor', 'lov', 'hat', 'liv', 'hom', 'has', 'sta'] -> ['e', 'd', 'l', 'd', 'e', 'e', 'e', 'e', 'h', 'r']


In [21]:
print(inputs, '->', [number_dict[n.item()] for n in predicts])

['mak', 'nee', 'coa', 'wor', 'lov', 'hat', 'liv', 'hom', 'has', 'sta'] -> ['e', 'd', 'l', 'd', 'e', 'e', 'e', 'e', 'h', 'r']
