In [1]:
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

In [2]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

from utils import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    pairs = pairs[:100]
    print("Sampled %s sentence pairs (for faster training)" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)

    dataset = {}
    dataset["input_lang"] = input_lang
    dataset["output_lang"] = output_lang
    dataset["pairs"] = pairs
    return dataset

dataset = prepareData('eng', 'fra', True)
print(random.choice(dataset["pairs"]))

Reading lines...
Read 135842 sentence pairs
Trimmed to 10599 sentence pairs
Sampled 100 sentence pairs (for faster training)
Counting words...
Counted words:
fra 92
eng 62
['je suis touche !', 'i m hit !']


In [4]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [5]:
#TODO: fix training loop to make it work with Vanilla decoder

teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def trainIters(encoder, decoder, dataset, n_iters, print_every=100):
    print_loss_total = 0  # Reset every print_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)
    training_pairs = [tensorsFromPair(dataset["input_lang"], dataset["output_lang"], random.choice(dataset["pairs"]))
                      for i in range(n_iters)]

    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print(f"Step: {iter} ({iter / n_iters * 100}%) Loss: {print_loss_avg}")

hidden_size = 256
encoder1 = EncoderRNN(dataset["input_lang"].n_words, hidden_size).to(device)
attn_decoder1 = DecoderAttentionRNN(hidden_size, dataset["output_lang"].n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, dataset, 1000)

Step: 100 (10.0%) Loss: 1.3914207198619841
Step: 200 (20.0%) Loss: 1.0527398685614269
Step: 300 (30.0%) Loss: 0.7719026787678398
Step: 400 (40.0%) Loss: 0.6057501345872879
Step: 500 (50.0%) Loss: 0.45779694914817787
Step: 600 (60.0%) Loss: 0.26584285498658816
Step: 700 (70.0%) Loss: 0.19282838666935756
Step: 800 (80.0%) Loss: 0.17215220003326734
Step: 900 (90.0%) Loss: 0.10387502460926772
Step: 1000 (100.0%) Loss: 0.1058629355082909


In [6]:
def evaluate(encoder, decoder, dataset, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(dataset["input_lang"], sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(dataset["output_lang"].index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

def evaluateRandomly(encoder, decoder, dataset, n=3):
    for i in range(n):
        pair = random.choice(dataset["pairs"])
        print("Input: ", pair[0])
        print("Expected:", pair[1])
        output_words, attentions = evaluate(encoder, decoder, dataset, pair[0])
        output_sentence = ' '.join(output_words)
        print("Pred: ", output_sentence)
        print('')

evaluateRandomly(encoder1, attn_decoder1, dataset)

Input:  je suis armee .
Expected: i m armed .
Pred:  i m armed . <EOS>

Input:  je suis arme .
Expected: i m armed .
Pred:  i m armed . <EOS>

Input:  je suis mouille .
Expected: i m wet .
Pred:  i m wet . <EOS>

