In [1]:
import json
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import math
import matplotlib.ticker as ticker
import numpy as np
import time
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data

In [2]:
MAX_LENGTH = 50

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

    
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [3]:
def read_data(file_name):
    """
    Reads the data and returns it in a list.
    """
    
    f = open(file_name, "r")
    return [line.strip().split() for line in f.readlines()]


def word_to_index(file_name):
    """
    Obtains the vocabulary of a file and returns it 
    in a dictionary to be able to use w2i.
    """
    
    file = open(file_name) 
    w2i = json.load(file)
    w2i["sos"] = len(w2i)
    return w2i


def index_to_word(dictionary):
    """
    Reverses the dictionary such that i2w can be used.
    """
    
    reversed_dict = {}
    
    for word, index in dictionary.items():
        reversed_dict[index] = word
    reversed_dict[index + 1] = "sos" 
    return reversed_dict


def sentence_to_indices(w2i, sentence):
    """
    Returns the indices of the words in a sentence in a list.
    """
    
    return [w2i[word] for word in sentence]


def sentence_to_tensor(w2i, sentence):
    """
    Returns the tensor of a sentence.
    """
    
    indices = sentence_to_indices(w2i, sentence)
    indices.append(EOS_token)
    return torch.tensor(indices, dtype=torch.long).view(-1, 1)

train_english = read_data("data/train_preprocessed.en")
train_french = read_data("data/train_preprocessed.fr")

w2i_french = word_to_index("data/train_preprocessed.fr.json")
w2i_english = word_to_index("data/train_preprocessed.en.json")

i2w_french = index_to_word(w2i_french)
i2w_english = index_to_word(w2i_english)

EOS_token = w2i_english["eos"]
SOS_token = w2i_english["sos"]
teacher_forcing_ratio = 0.5
encoder = EncoderRNN(len(i2w_english), 256)
decoder = AttnDecoderRNN(256, len(i2w_french))

In [4]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))    
    
# def train(w2i_english, w2i_french, dataloader_english, dataloader_french, 
#           encoder, decoder, encoder_optimizer, decoder_optimizer, 
#           criterion, minibatch_size, max_length=MAX_LENGTH):
#     """
#     Does one iteration of training.
#     """
    
#     loss = 0
    
#     for i in range(0, len(train_english), minibatch_size):
        
#         for j in range(minibatch_size):           
#             print("Sentence",  str(i+j), "out of", str(len(train_english)))
#             input_tensor = sentence_to_tensor(w2i_english, train_english[i+j])
#             target_tensor = sentence_to_tensor(w2i_french, train_french[i+j])

#             encoder_hidden = encoder.initHidden()

#             encoder_optimizer.zero_grad()
#             decoder_optimizer.zero_grad()

#             input_length = input_tensor.size(0)
#             target_length = target_tensor.size(0)

#             encoder_outputs = torch.zeros(max_length, encoder.hidden_size)

#             for ei in range(input_length):
#                 encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
#                 encoder_outputs[ei] = encoder_output[0, 0]

#             decoder_input = torch.tensor([[SOS_token]])
#             decoder_hidden = encoder_hidden
#             use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

#             if use_teacher_forcing:
#                 # Teacher forcing: Feed the target as the next input
#                 for di in range(target_length):
#                     decoder_output, decoder_hidden, decoder_attention = decoder(
#                         decoder_input, decoder_hidden, encoder_outputs)
#                     loss += criterion(decoder_output, target_tensor[di])
#                     decoder_input = target_tensor[di]  # Teacher forcing
#             else:
#                 # Without teacher forcing: use its own predictions as the next input
#                 for di in range(target_length):
#                     decoder_output, decoder_hidden, decoder_attention = decoder(
#                         decoder_input, decoder_hidden, encoder_outputs)
#                     topv, topi = decoder_output.topk(1)
#                     decoder_input = topi.squeeze().detach()  # detach from history as input

#                     loss += criterion(decoder_output, target_tensor[di])
#                     if decoder_input.item() == EOS_token:
#                         break

# #             loss.backward(retain_graph=True)            
#             loss.backward()            

#             encoder_optimizer.step()
#             decoder_optimizer.step()
#         encoder_hidden = encoder_hidden.detach()
#         decoder_hidden = decoder_hidden.detach()
#     return loss.item()/target_length


def train(input_sentence, target_sentence, w2i_english, 
          w2i_french, encoder, decoder, encoder_optimizer, 
          decoder_optimizer, criterion, minibatch_size, 
          max_length=MAX_LENGTH):
    """
    Does one iteration of training.
    """
    
    loss = 0     
    input_tensor = sentence_to_tensor(w2i_english, input_sentence)
    target_tensor = sentence_to_tensor(w2i_french, target_sentence)

    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size)

    if input_length > MAX_LENGTH: input_length = MAX_LENGTH
    
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]])
    decoder_hidden = encoder_hidden
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing
    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break         
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()
    return loss.item()/target_length


def train_iterations(w2i_english, w2i_french, train_english, train_french,
                     encoder, decoder, minibatch_size, n_iters, print_every=100, 
                     plot_every=100, learning_rate=0.01):
    """
    Trains the Encoder-Decoder model for a certain amount of iterations.
    """
    
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        input_sentence = train_english[iter-1]
        target_sentence = train_french[iter-1]
        loss = train(input_sentence, target_sentence, w2i_english, w2i_french,
                     encoder, decoder, encoder_optimizer, decoder_optimizer, 
                     criterion, minibatch_size)        
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter/n_iters),
                                         iter, float(iter)/n_iters*100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    showPlot(plot_losses)
    
    
trainloader_english = torch.utils.data.DataLoader(train_english, 
                                                  batch_size=32, shuffle=False, 
                                                  num_workers=8)
trainloader_french = torch.utils.data.DataLoader(train_french,
                                                batch_size=32, shuffle=False,
                                                num_workers=8)
# testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=8)
    
train_iterations(w2i_english, w2i_french, train_english, train_french,
                 encoder, decoder, 10, 29000)

0m 8s (- 42m 4s) (100 0%) 4.4809


KeyboardInterrupt: 