In [1]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

import time
import math
import numpy as np

import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as V
import torch.utils.data as Data
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from pyfile.text_loader import TextDataset
import pyfile.seq2seq_models as sm
from pyfile.seq2seq_models import cuda_variable, str2tensor, EOS_token, SOS_token

In [2]:
N_LAYERS = 1
BATCH_SIZE = 1
N_EPOCH = 2
N_CHARS = 128      # ASCII
HIDDEN_SIZE = N_CHARS

In [3]:
# Train for a given src and target
def train(src, target):
    
    loss = 0
    
    src_var = str2tensor(src)
    # print("src_var: ", src_var)
    target_var = str2tensor(target, eos=True)   # Add the EOS token
    # print("target_var: ", target_var)
    
    encoder_hidden = encoder.init_hidden()
    encoder_outputs, encoder_hidden = encoder(src_var, encoder_hidden)
    
    hidden = encoder_hidden
    
    for c in range(len(target_var)):
        # First, we feed SOS. Others, we use teacher forcing.
        token = target_var[c - 1] if c else str2tensor(SOS_token)
        output, hidden, attention = decoder(token, hidden, encoder_outputs)
        
        loss += criterion(output, target_var[c])
    
    encoder.zero_grad()
    decoder.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.data[0] / len(target_var)

In [4]:
# Simple test to show how train works
def test():
    encoder_test = sm.EncoderRNN(10, 10, 2)
    decoder_test = sm.AttnDecoderRNN(10, 10, 2)
    
    if t.cuda.is_available():
        encoder_test.cuda()
        decoder_test.cuda()
    
    encoder_hidden = encoder_test.init_hidden()
    word_input = cuda_variable(t.LongTensor([1, 2, 3]))
    encoder_outputs, encoder_hidden = encoder_test(word_input, encoder_hidden)
    print("encoder_outputs size: ", encoder_outputs.size())
    
    word_target = cuda_variable(t.LongTensor([1, 2, 3]))
    decoder_attns = t.zeros(1, 3, 3)
    decoder_hidden = encoder_hidden
    
    for c in range(len(word_target)):
        decoder_output, decoder_hidden, decoder_attn = \
               decoder_test(word_target[c], decoder_hidden, encoder_outputs)
        print("decoder output size: ", decoder_output.size(), 
              "\ndecoder hidden size: ", decoder_hidden.size(),
              "\ndecoder attn size: ", decoder_attn.size())
        decoder_attns[0, c] = decoder_attn.squeeze(0).cpu().data


In [5]:
# Traslate the given input
def translate(enc_input="thisissungkim.iloveyou", predict_len=100, temperate=0.9):
    input_var = str2tensor(enc_input)
    
    encoder_hidden = encoder.init_hidden()
    encoder_outputs, encoder_hidden = encoder(input_var, encoder_hidden)
    
    hidden = encoder_hidden
    
    predicted = ''
    
    dec_input = str2tensor(SOS_token)
    attentions = []
    
    for c in range(predict_len):
        output, hidden, attention = decoder(dec_input, hidden, encoder_outputs)
        
        # Sample from the nerwork as a multi nominal distribution
        output_dist = output.data.view(-1).div(temperate).exp()
        top_i = t.multinomial(output_dist, 1)[0]
        attentions.append(attention.view(-1).data.cpu().numpy().tolist())
        
        if top_i is EOS_token:
            break
        
        predicted_char = chr(top_i)
        predicted += predicted_char
        
        dec_input = str2tensor(predicted_char)
    return predicted, attentions

In [None]:
# main 
# N_CHARS: 128, HIDDEN_SIZE: 128, N_LAYERS: 1
encoder = sm.EncoderRNN(N_CHARS, HIDDEN_SIZE, N_LAYERS)
decoder = sm.AttnDecoderRNN(HIDDEN_SIZE, N_CHARS, N_LAYERS)

if t.cuda.is_available():
    encoder.cuda()
    decoder.cuda()
print("encoder: ", encoder,
      "\ndecoder: ", decoder)


# Optimizer and Loss
params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = optim.Adam(params, lr=0.001)
criterion = nn.CrossEntropyLoss()

train_loader = Data.DataLoader(dataset=TextDataset(),
                               batch_size=BATCH_SIZE,
                               shuffle=True,
                               num_workers=2)

print("Training for %d epochs..." % N_EPOCH)

for epoch in range(1, N_EPOCH + 1):
    # Get srcs and targets from data loader
    for i, (srcs, targets) in enumerate(train_loader):
        # print("srcs[0]: ", srcs[0],
        #       "\ntargets[0]: ", targets[0])
    
        train_loss = train(srcs[0], targets[0])
        
        if i % 100 is 0:
            print("Epoch: (%d/%d) Step: (%d/%d) Loss: %.4f" %
                  (epoch, N_EPOCH, i, len(train_loader), train_loss))
            
            output, _ = translate(srcs[0])
            print(srcs[0], output, '\n')
            
            output, attentions = translate()
            print('thisissungkim.iloveyou.', output, '\n')
        
        

In [None]:
for eachdata in train_loader.dataset[:10]:
    print(eachdata)

In [9]:
print(train_loader.dataset[1])

('beforeweproceedanyfurther,hearmespeak.', 'Before we proceed any further, hear me speak.')


In [13]:
for i, (srcs, targets) in enumerate(train_loader):
    print("srcs: ", srcs[0])
    print("targets: ", targets[0])  

srcs size:  theplebeianshavegotyourfellow-tribune
targets size:  The plebeians have got your fellow-tribune


ValueError: 