In [0]:
# Translation with Sequence to sequence with attention in Pytorch
# From examples, modified for new dataset by Alex Shah
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html#sphx-glr-intermediate-seq2seq-translation-tutorial-py
# https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/seq2seq_translation_tutorial.ipynb

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

from io import open
import unicodedata
import string
import time
import random
import math

SOS_token = 0
EOS_token = 1
MAX_LENGTH = 30

#lang1, lang2 = "eng", "spa"
lang1, lang2 = "eng", "fra"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1


In [125]:
test = "Run!	¡Corran!	CC-BY 2.0 (France) Attribution: tatoeba.org #906328 (papabear) & #5213896 (cueyayotl)"
splittest = test.split('\t')[:-1]
splittest

['Run!', '¡Corran!']

In [0]:
def cleanString(s):
  s = s.lower().strip()
  s.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )
  return s

def readInDS(lang1, lang2):
  lines = open('/content/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
  pairs = [[cleanString(s) for s in l.split('\t')[:2]] for l in lines]
  input_lang = Lang(lang1)
  output_lang = Lang(lang2)
  return input_lang, output_lang, pairs

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

def preprocess(lang1, lang2):
  input_lang, output_lang, pairs = readInDS(lang1, lang2)
  pairs = filterPairs(pairs)
  for pair in pairs:
      input_lang.addSentence(pair[0])
      output_lang.addSentence(pair[1])
  print(input_lang.name, input_lang.n_words)
  print(output_lang.name, output_lang.n_words)
  return input_lang, output_lang, pairs

In [0]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
      super(EncoderRNN, self).__init__()
      self.hidden_size = hidden_size
      self.embedding = nn.Embedding(input_size, hidden_size)
      self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
      embedded = self.embedding(input).view(1,1,-1)
      output = embedded
      output, hidden = self.gru(output, hidden)
      return output, hidden

    def initHidden(self):
      return torch.zeros(1,1,self.hidden_size, device=device)

#No attention
class DecoderRNN(nn.Module):
  def __init__(self, hidden_size, output_size):
    super(DecoderRNN, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(output_size, hidden_size)
    self.gru = nn.GRU(hidden_size, hidden_size)
    self.out = nn.Linear(hidden_size, output_size)
    self.softmax = nn.LogSoftmax(dim=1)

  def forward(self, input, hidden):
    output = self.embedding(input).view(1,1,-1)
    output = F.relu(output)
    output, hidden = self.gru(output, hidden)
    output = self.softmax(self.out(output[0]))
    return output, hidden

  def initHidden(self):
    return torch.zeros(1,1,self.hidden_size, device=device)

#with attention
class AttentionDecoderRNN(nn.Module):
  def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
    super(AttentionDecoderRNN, self).__init__()
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.dropout_p = dropout_p
    self.max_length = max_length
    self.embedding = nn.Embedding(self.output_size, self.hidden_size)
    self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
    self.attn_combo = nn.Linear(self.hidden_size * 2, self.hidden_size)
    self.dropout = nn.Dropout(self.dropout_p)
    self.gru = nn.GRU(self.hidden_size, self.hidden_size)
    self.out = (self.hidden_size, self.output_size)

  def forward(self, input, hidden, encoder_outputs):
    embedded = self.embedding(input).view(1,1,-1)
    embedded = self.dropout(embedded)
    attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
    attn_made = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
    output = torch.cat((embedded[0], attn_made[0]), 1)
    output = self.attn_combo(output).unsqueeze(0)
    output = F.relu(output)
    output, hidden = self.gru(output, hidden)
    output = F.log_softmax(self.out(output[0]), dim=1)
    return output, hidden, attn_weights

  def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [0]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [0]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [0]:
def train(in_tensor, out_tensor, encoder, decoder, enc_opt, dec_opt, crit, max_length=MAX_LENGTH):
  enc_hidden = encoder.initHidden()
  enc_opt.zero_grad()
  dec_opt.zero_grad()
  in_length = in_tensor.size(0)
  out_length = out_tensor.size(0)
  enc_outs = torch.zeros(max_length, encoder.hidden_size, device=device)
  loss = 0

  for i in range(in_length):
    enc_out, enc_hidden = encoder(in_tensor[i], enc_hidden)
    enc_outs[i] = enc_out[0,0]

  dec_in = torch.tensor([[SOS_token]], device=device)
  dec_hidden = enc_hidden

  for i in range(out_length):
    dec_out, dec_hidden, dec_attn = decoder(dec_in, dec_hidden, enc_outs)
    kv, ki = dec_out.topk(1)
    dec_in = ki.squeeze().detach()
    loss += crit(dec_out, out_tensor[i])
    if dec_in.item() == EOS_token:
      break
    loss.backward()
    enc_opt.step()
    dec_opt.step()
    return loss.item()/out_length

In [0]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [0]:
def trainIters(encoder, decoder, n_iters, print_every=1000, learning_rate=0.01):
    start = time.time()
    print_loss_total = 0

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

In [0]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
  with torch.no_grad():
    input_tensor - tensorFromSentence(input_lang, sentence)
    input_length = input_tensor.size()[0]
    encoder_hidden = encoder.initHidden()
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
    for i in range(input_length):
      encoder_output, encoder_hidden = encoder(input_tensor[i], encoder_hidden)
      encoder_outputs[i] += encoder_output[0,0]
    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden
    decoded_words = []
    decoder_attn = torch.zeros(max_length, max_length)

    for i in range(max_length):
      decoder_output, decoder_hidden, decoder_attn = decoder(decoder_input, decoder_hidden, encoder_outputs)
      decoder_attn[i] = decoder_attn.data
      kv, ki = deocder_output.data.topk(1)
      if ki.item() == EOS_token:
        decoded_words.append('<EOS>')
        break
      else:
        decoded_words.append(output_lang.index2word[ki.item()])
      decoder_input = ki.squeeze().detach()
    return decoded_words, decoder_attn[:i+1]

  def randEval(encoder, decoder, num_examples=10):
    for i in range(num_examples):
      pair = random.choice(pairs)
      print(" input: ", pair[0])
      print(" actual: ", pair[1])
      output_words, attentions = evaluate(encoder, decoder, pair[0])
      output_sentence = ' '.join(output_words)
      print(" predicted: ", output_sentence)

In [174]:
hidden_size = 256
input_lang, output_lang, pairs = preprocess(lang1, lang2)

encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75000, print_every=50)

eng 24405
fra 43119
0m 12s (- 300m 22s) (50 0%) 1.6795
0m 22s (- 277m 45s) (100 0%) 1.4003
0m 32s (- 271m 39s) (150 0%) 1.3989
0m 43s (- 271m 25s) (200 0%) 0.9451
0m 54s (- 270m 15s) (250 0%) 0.8853
1m 5s (- 270m 24s) (300 0%) 0.9102
1m 15s (- 268m 31s) (350 0%) 0.9599
1m 26s (- 267m 35s) (400 0%) 0.9003
1m 36s (- 266m 44s) (450 0%) 0.8138
1m 47s (- 266m 1s) (500 0%) 0.8133
1m 57s (- 265m 54s) (550 0%) 0.9971
2m 8s (- 265m 29s) (600 0%) 0.9661
2m 19s (- 266m 21s) (650 0%) 0.7322
2m 30s (- 267m 0s) (700 0%) 0.7396
2m 41s (- 265m 56s) (750 1%) 1.0041
2m 51s (- 265m 48s) (800 1%) 0.7909
3m 2s (- 265m 41s) (850 1%) 0.8473
3m 13s (- 265m 26s) (900 1%) 0.7899
3m 23s (- 264m 48s) (950 1%) 0.8639
3m 34s (- 263m 58s) (1000 1%) 0.7457
3m 44s (- 264m 4s) (1050 1%) 0.7844
3m 55s (- 263m 39s) (1100 1%) 0.9487
4m 5s (- 263m 0s) (1150 1%) 0.7112
4m 16s (- 263m 14s) (1200 1%) 0.8300
4m 27s (- 262m 50s) (1250 1%) 0.8937
4m 37s (- 262m 5s) (1300 1%) 1.0753
4m 47s (- 261m 43s) (1350 1%) 0.6931
4m 58s (- 

KeyboardInterrupt: ignored