In [1]:
%matplotlib inline

In [2]:
!pip install torchmetrics



In [3]:
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('punkt')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

Loading data files
==================

In [4]:
!wget http://www.manythings.org/anki/fra-eng.zip
!unzip -o fra-eng.zip
!mkdir data
!mv fra.txt data/eng-fra.txt

--2024-02-19 22:44:04--  http://www.manythings.org/anki/fra-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7833145 (7.5M) [application/zip]
Saving to: 'fra-eng.zip'


2024-02-19 22:44:06 (6.14 MB/s) - 'fra-eng.zip' saved [7833145/7833145]

Archive:  fra-eng.zip
  inflating: _about.txt              
  inflating: fra.txt                 


In [5]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [6]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

To read the data file we will split the file into lines, and then split
lines into pairs. The files are all English → Other Language, so if we
want to translate from Other Language → English I added the ``reverse``
flag to reverse the pairs.

In [7]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')[:2]] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

Since there are a *lot* of example sentences and we want to train
something quickly, we'll trim the data set to only relatively short and
simple sentences. Here the maximum length is 15 words (that includes
ending punctuation) and we're filtering to sentences that translate to
the form "I am" or "He is" etc. (accounting for apostrophes replaced
earlier).

In [8]:
MAX_LENGTH = 15

eng_prefixes = (
    "i am", "i m",
    "he is", "he s",
    "she is", "she s",
    "you are", "you re",
    "we are", "we re",
    "they are", "they re"
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

The full process for preparing the data is:

-  Read text file and split into lines, split lines into pairs
-  Normalize text, filter by length and content
-  Make word lists from sentences in pairs

In [9]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
#print(random.choice(pairs))

Reading lines...
Read 229803 sentence pairs
Trimmed to 22708 sentence pairs
Counting words...
Counted words:
fra 6986
eng 4611


In [10]:
from sklearn.model_selection import train_test_split

X = [i[0] for i in pairs]
y = [i[1] for i in pairs]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

train_pairs = list(zip(X_train,y_train))
test_pairs = list(zip(X_test,y_test))

The Seq2Seq Model
=================

The Encoder
-----------


In [11]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

The Decoder
-----------

The decoder is another RNN that takes the encoder output vector(s) and
outputs a sequence of words to create the translation.


Attention Decoder
^^^^^^^^^^^^^^^^^ (Example)

In [13]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)



Training
========

Preparing Training Data
-----------------------

In [14]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

Training the Model
------------------

In [15]:
teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

This is a helper function to print time elapsed and estimated time
remaining given the current time and progress %.

In [16]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

The whole training process looks like this:

-  Start a timer
-  Initialize optimizers and criterion
-  Create set of training pairs
-  Start empty losses array for plotting

Then we call ``train`` many times and occasionally print the progress (%
of examples, time so far, estimated time) and average loss.


In [17]:
def trainIters(encoder, decoder, epochs, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    criterion = nn.NLLLoss()

    iter = 1
    n_iters = len(train_pairs) * epochs

    for epoch in range(epochs):
        print("Epoch: %d/%d" % (epoch, epochs))
        for training_pair in train_pairs:
            training_pair = tensorsFromPair(training_pair)

            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                            iter, iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            iter +=1

    showPlot(plot_losses)

Plotting results
----------------

Plotting is done with matplotlib, using the array of loss values
``plot_losses`` saved while training.


In [18]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

Evaluation
==========

Evaluation is mostly the same as training, but there are no targets so
we simply feed the decoder's predictions back to itself for each step.
Every time it predicts a word we add it to the output string, and if it
predicts the EOS token we stop there. We also store the decoder's
attention outputs for display later.

In [19]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

We can evaluate random sentences from the training set and print out the
input, target, and output to make some subjective quality judgements:

In [20]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [21]:
from torchmetrics.text.rouge import ROUGEScore
from tqdm import tqdm

rouge = ROUGEScore()

def test(encoder, decoder, testing_pairs):
    input = []
    gt = []
    predict = []
    metric_score = {
        "rouge1_fmeasure":[],
        "rouge1_precision":[],
        "rouge1_recall":[],
        "rouge2_fmeasure":[],
        "rouge2_precision":[],
        "rouge2_recall":[]
    }
    from tqdm import tqdm
    for i in tqdm(range(len(testing_pairs))):
        pair = testing_pairs[i]
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)

        input.append(pair[0])
        gt.append(pair[1])
        predict.append(output_sentence)

        try:
            rs = rouge(output_sentence, pair[1])
        except:
            continue
        metric_score["rouge1_fmeasure"].append(rs['rouge1_fmeasure'])
        metric_score["rouge1_precision"].append(rs['rouge1_precision'])
        metric_score["rouge1_recall"].append(rs['rouge1_recall'])
        metric_score["rouge2_fmeasure"].append(rs['rouge2_fmeasure'])
        metric_score["rouge2_precision"].append(rs['rouge2_precision'])
        metric_score["rouge2_recall"].append(rs['rouge2_recall'])

    metric_score["rouge1_fmeasure"] = np.array(metric_score["rouge1_fmeasure"]).mean()
    metric_score["rouge1_precision"] = np.array(metric_score["rouge1_precision"]).mean()
    metric_score["rouge1_recall"] = np.array(metric_score["rouge1_recall"]).mean()
    metric_score["rouge2_fmeasure"] = np.array(metric_score["rouge2_fmeasure"]).mean()
    metric_score["rouge2_precision"] = np.array(metric_score["rouge2_precision"]).mean()
    metric_score["rouge2_recall"] = np.array(metric_score["rouge2_recall"]).mean()

    print("=== Evaluation score - Rouge score ===")
    print("Rouge1 fmeasure:\t",metric_score["rouge1_fmeasure"])
    print("Rouge1 precision:\t",metric_score["rouge1_precision"])
    print("Rouge1 recall:  \t",metric_score["rouge1_recall"])
    print("Rouge2 fmeasure:\t",metric_score["rouge2_fmeasure"])
    print("Rouge2 precision:\t",metric_score["rouge2_precision"])
    print("Rouge2 recall:  \t",metric_score["rouge2_recall"])
    print("=====================================")
    return input,gt,predict,metric_score

Training and Evaluating
=======================

With all these helper functions in place (it looks like extra work, but
it makes it easier to run multiple experiments) we can actually
initialize a network and start training.

Remember that the input sentences were heavily filtered. For this small
dataset we can use relatively small networks of 256 hidden nodes and a
single GRU layer. After about 40 minutes on a MacBook CPU we'll get some
reasonable results.

.. Note::
   If you run this notebook you can train, interrupt the kernel,
   evaluate, and continue training later. Comment out the lines where the
   encoder and decoder are initialized and run ``trainIters`` again.

In [21]:
hidden_size = 256
epochs = 4

encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, epochs, print_every=1000)

Epoch: 0/4
0m 17s (- 23m 49s) (1000 1%) 3.7981
0m 35s (- 23m 37s) (2000 2%) 3.3534
0m 53s (- 23m 17s) (3000 3%) 3.1568
1m 11s (- 23m 7s) (4000 4%) 3.0436
1m 29s (- 22m 52s) (5000 6%) 2.9308
1m 47s (- 22m 39s) (6000 7%) 2.8430
2m 5s (- 22m 21s) (7000 8%) 2.8518
2m 23s (- 22m 4s) (8000 9%) 2.7422
2m 42s (- 21m 50s) (9000 11%) 2.7138
3m 0s (- 21m 31s) (10000 12%) 2.6023
3m 18s (- 21m 14s) (11000 13%) 2.5893
3m 36s (- 20m 57s) (12000 14%) 2.6246
3m 54s (- 20m 39s) (13000 15%) 2.5064
4m 12s (- 20m 22s) (14000 17%) 2.4385
4m 30s (- 20m 4s) (15000 18%) 2.4244
4m 48s (- 19m 47s) (16000 19%) 2.4142
5m 7s (- 19m 29s) (17000 20%) 2.4000
5m 25s (- 19m 12s) (18000 22%) 2.3571
5m 43s (- 18m 55s) (19000 23%) 2.3802
6m 1s (- 18m 37s) (20000 24%) 2.2832
Epoch: 1/4
6m 20s (- 18m 20s) (21000 25%) 2.2259
6m 38s (- 18m 2s) (22000 26%) 2.1896
6m 56s (- 17m 45s) (23000 28%) 2.1512
7m 15s (- 17m 27s) (24000 29%) 2.1119
7m 33s (- 17m 9s) (25000 30%) 2.0959
7m 51s (- 16m 51s) (26000 31%) 2.0687
8m 10s (- 16m 33

#Evaluation randomly

In [22]:
evaluateRandomly(encoder1, attn_decoder1)

> nous ne rajeunissons pas .
= we re not getting any younger .
< we re not getting any any <EOS>

> ils sont saouls .
= they re drunk .
< they re weak . <EOS>

> je suis bilingue .
= i m bilingual .
< i m an electrician . <EOS>

> je suis une femme mariee desormais .
= i m a married woman now .
< i m a married now . <EOS>

> je lis un magazine .
= i am reading a magazine .
< i m reading a magazine . <EOS>

> vous me rappelez votre mere .
= you remind me of your mother .
< you re very disappointed . <EOS>

> je vais le prouver .
= i m going to prove it .
< i m going to the <EOS>

> je suis peut etre vieux mais pas fou .
= i may be old but i m not crazy .
< i may be quite enough to be . <EOS>

> je ne suis pas sur de ce que ca veut dire .
= i m not sure what it means .
< i m not sure that what what s . <EOS>

> j en suis certain .
= i m sure of that .
< i m not certain . <EOS>



In [23]:
input,gt,predict,score = test(encoder1, attn_decoder1, test_pairs)

100%|██████████| 2271/2271 [00:20<00:00, 112.14it/s]


=== Evaluation score - Rouge score ===
Rouge1 fmeasure:	 0.5936495
Rouge1 precision:	 0.56292987
Rouge1 recall:  	 0.6393193
Rouge2 fmeasure:	 0.40064085
Rouge2 precision:	 0.3729596
Rouge2 recall:  	 0.44334692


No-Att Decoder
===============

In [24]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        output, hidden = self.gru(embedded, hidden)

        output = F.relu(output)
        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [25]:
hidden_size = 256
epochs = 4

encoder2 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder2 = DecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder2, decoder2, epochs, print_every=5000)

Epoch: 0/4
1m 4s (- 16m 22s) (5000 6%) 3.3759
2m 9s (- 15m 30s) (10000 12%) 2.9843
3m 16s (- 14m 33s) (15000 18%) 2.8260
4m 23s (- 13m 34s) (20000 24%) 2.7245
Epoch: 1/4
5m 31s (- 12m 32s) (25000 30%) 2.5503
6m 39s (- 11m 28s) (30000 36%) 2.4608
7m 46s (- 10m 23s) (35000 42%) 2.3855
8m 54s (- 9m 17s) (40000 48%) 2.3351
Epoch: 2/4
10m 1s (- 8m 11s) (45000 55%) 2.2244
11m 9s (- 7m 5s) (50000 61%) 2.1817
12m 17s (- 5m 58s) (55000 67%) 2.1405
13m 25s (- 4m 51s) (60000 73%) 2.1081
Epoch: 3/4
14m 33s (- 3m 45s) (65000 79%) 2.0221
15m 41s (- 2m 37s) (70000 85%) 1.9735
16m 48s (- 1m 30s) (75000 91%) 1.9466
17m 56s (- 0m 23s) (80000 97%) 1.9285


In [26]:
evaluateRandomly(encoder2, decoder2)

> je vous vire .
= i m firing you .
< i m you you . <EOS>

> tu vas devoir faire confiance a tom .
= you re going to have to trust tom .
< you re going to have to do that . . <EOS>

> j achete les billets .
= i m buying the tickets .
< i m taking a . . <EOS>

> nous n allons pas vous faire de mal .
= we re not going to hurt you .
< we re not going to you . . <EOS>

> je vais devoir vous rappeler de suite .
= i m going to have to call you right back .
< i m going to have to to . . . . <EOS>

> vous n avez vraiment pas l ombre d une idee si ?
= you really don t have a clue do you ?
< you re not really a a a you you you you you <EOS>

> il est impatient d y aller .
= he is eager to go there .
< he is to go to . . <EOS>

> il est dans les affaires .
= he is in business .
< he is in in . . <EOS>

> tu es de loin plus rapide que moi .
= you re way faster than me .
< you re taller than me than me . <EOS>

> elle se tenait a cote de lui .
= she stood by him .
< she sat him to him . <EOS>



In [27]:
input,gt,predict,score = test(encoder2, decoder2, test_pairs)

100%|██████████| 2271/2271 [00:17<00:00, 127.17it/s]


=== Evaluation score - Rouge score ===
Rouge1 fmeasure:	 0.55004895
Rouge1 precision:	 0.530589
Rouge1 recall:  	 0.5803443
Rouge2 fmeasure:	 0.35792232
Rouge2 precision:	 0.3394463
Rouge2 recall:  	 0.38715023


LSTM-encoder and LSTM-decoder
=============================

In [28]:
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden,c):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, (hidden,c) = self.lstm(output, (hidden,c))
        return output, hidden,c

    def initHidden(self):
        return (
            torch.zeros(1, 1, self.hidden_size, device=device),
            torch.zeros(1, 1, self.hidden_size, device=device)
        )

In [29]:
class AttnDecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden,c, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, (hidden,c) = self.lstm(output, (hidden,c))

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden,c

    def initHidden(self):
        return (
            torch.zeros(1, 1, self.hidden_size, device=device),
            torch.zeros(1, 1, self.hidden_size, device=device)
        )

In [30]:
teacher_forcing_ratio = 0.5

def train3(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden,c = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden,c = encoder(
            input_tensor[ei], encoder_hidden,c)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden,c = decoder(
                decoder_input, decoder_hidden,c,encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden,c = decoder(
                decoder_input, decoder_hidden,c, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [31]:
def trainIters3(encoder, decoder, epochs, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    criterion = nn.NLLLoss()

    iter = 1
    n_iters = len(train_pairs) * epochs

    for epoch in range(epochs):
        print("Epoch: %d/%d" % (epoch, epochs))
        for training_pair in train_pairs:
            training_pair = tensorsFromPair(training_pair)

            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train3(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                            iter, iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            iter +=1

    showPlot(plot_losses)

In [32]:
hidden_size = 256
epochs = 4

encoder3 = EncoderLSTM(input_lang.n_words, hidden_size).to(device)
decoder3 = AttnDecoderLSTM(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters3(encoder3, decoder3, epochs, print_every=5000)

Epoch: 0/4
1m 36s (- 24m 39s) (5000 6%) 3.4390
3m 16s (- 23m 27s) (10000 12%) 3.0061
4m 56s (- 21m 58s) (15000 18%) 2.7932
6m 37s (- 20m 28s) (20000 24%) 2.6290
Epoch: 1/4
8m 18s (- 18m 52s) (25000 30%) 2.4123
9m 59s (- 17m 14s) (30000 36%) 2.2959
11m 40s (- 15m 36s) (35000 42%) 2.1949
13m 22s (- 13m 58s) (40000 48%) 2.1214
Epoch: 2/4
15m 4s (- 12m 18s) (45000 55%) 1.9858
16m 45s (- 10m 38s) (50000 61%) 1.9188
18m 27s (- 8m 58s) (55000 67%) 1.8562
20m 9s (- 7m 18s) (60000 73%) 1.8180
Epoch: 3/4
21m 50s (- 5m 37s) (65000 79%) 1.7084
23m 32s (- 3m 57s) (70000 85%) 1.6483
25m 13s (- 2m 16s) (75000 91%) 1.6072
26m 55s (- 0m 35s) (80000 97%) 1.5848


In [33]:
def evaluate3(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden,c = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden,c = encoder(input_tensor[ei],
                                                     encoder_hidden,c)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden,c = decoder(
                decoder_input, decoder_hidden,c,encoder_outputs)

            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [34]:
def evaluateRandomly3(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate3(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [35]:
from torchmetrics.text.rouge import ROUGEScore
from tqdm import tqdm

rouge = ROUGEScore()

def test3(encoder, decoder, testing_pairs):
    input = []
    gt = []
    predict = []
    metric_score = {
        "rouge1_fmeasure":[],
        "rouge1_precision":[],
        "rouge1_recall":[],
        "rouge2_fmeasure":[],
        "rouge2_precision":[],
        "rouge2_recall":[]
    }
    from tqdm import tqdm
    for i in tqdm(range(len(testing_pairs))):
        pair = testing_pairs[i]
        output_words = evaluate3(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)

        input.append(pair[0])
        gt.append(pair[1])
        predict.append(output_sentence)

        try:
            rs = rouge(output_sentence, pair[1])
        except:
            continue
        metric_score["rouge1_fmeasure"].append(rs['rouge1_fmeasure'])
        metric_score["rouge1_precision"].append(rs['rouge1_precision'])
        metric_score["rouge1_recall"].append(rs['rouge1_recall'])
        metric_score["rouge2_fmeasure"].append(rs['rouge2_fmeasure'])
        metric_score["rouge2_precision"].append(rs['rouge2_precision'])
        metric_score["rouge2_recall"].append(rs['rouge2_recall'])

    metric_score["rouge1_fmeasure"] = np.array(metric_score["rouge1_fmeasure"]).mean()
    metric_score["rouge1_precision"] = np.array(metric_score["rouge1_precision"]).mean()
    metric_score["rouge1_recall"] = np.array(metric_score["rouge1_recall"]).mean()
    metric_score["rouge2_fmeasure"] = np.array(metric_score["rouge2_fmeasure"]).mean()
    metric_score["rouge2_precision"] = np.array(metric_score["rouge2_precision"]).mean()
    metric_score["rouge2_recall"] = np.array(metric_score["rouge2_recall"]).mean()

    print("=== Evaluation score - Rouge score ===")
    print("Rouge1 fmeasure:\t",metric_score["rouge1_fmeasure"])
    print("Rouge1 precision:\t",metric_score["rouge1_precision"])
    print("Rouge1 recall:  \t",metric_score["rouge1_recall"])
    print("Rouge2 fmeasure:\t",metric_score["rouge2_fmeasure"])
    print("Rouge2 precision:\t",metric_score["rouge2_precision"])
    print("Rouge2 recall:  \t",metric_score["rouge2_recall"])
    print("=====================================")
    return input,gt,predict,metric_score

In [36]:
evaluateRandomly3(encoder3, decoder3)

> je suis habitue a cet ordinateur .
= i m used to this computer .
< i m accustomed to this . <EOS>

> tu as bonne mine !
= you re looking good !
< you re looking good ! <EOS>

> il etudie le chinois .
= he studies chinese .
< he stopped in the . <EOS>

> nous vous protegeons .
= we re protecting you .
< we re waiting . <EOS>

> vous ne saisissez pas .
= you re missing the point .
< you re not listening any . <EOS>

> vous etes fort emotives .
= you re very emotional .
< you re very good . <EOS>

> je suis terrifie .
= i m terrified .
< i m a . <EOS>

> ils sont vraiment radins .
= they re really tight .
< they re really surprised . <EOS>

> je ne suis pas sur que vous soyez prete .
= i m not sure that you re ready .
< i m not sure that you re ready . <EOS>

> il a etabli des amities avec les gens les plus improbables .
= he struck up friendships with the most unlikely people .
< he is with with with with with . . <EOS>



In [37]:
input,gt,predict,score = test3(encoder3, decoder3, test_pairs)

100%|██████████| 2271/2271 [00:21<00:00, 106.58it/s]


=== Evaluation score - Rouge score ===
Rouge1 fmeasure:	 0.5957047
Rouge1 precision:	 0.56564856
Rouge1 recall:  	 0.63748354
Rouge2 fmeasure:	 0.40394336
Rouge2 precision:	 0.37731498
Rouge2 recall:  	 0.44290024


BiLSTM-encoder
==============

In [22]:
class EncoderBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderBiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,bidirectional=True)

    def forward(self, input, hidden,c):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, (hidden,c) = self.lstm(output, (hidden,c))
        return output, hidden,c

    def initHidden(self):
        return (
            torch.zeros(2, 1, self.hidden_size, device=device),
            torch.zeros(2, 1, self.hidden_size, device=device)
        )

In [24]:
class AttnDecoderRNN4(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN4, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2+self.hidden_size, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device),
           

In [25]:
teacher_forcing_ratio = 0.5

def train4(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden,c = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size*2, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden,c = encoder(
            input_tensor[ei], encoder_hidden,c)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden.mean(dim=0,keepdim=True)

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [26]:
def trainIters4(encoder, decoder, epochs, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    criterion = nn.NLLLoss()

    iter = 1
    n_iters = len(train_pairs) * epochs

    for epoch in range(epochs):
        print("Epoch: %d/%d" % (epoch, epochs))
        for training_pair in train_pairs:
            training_pair = tensorsFromPair(training_pair)

            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train4(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                            iter, iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            iter +=1

    showPlot(plot_losses)

In [29]:
def evaluate4(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden,c = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size*2, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden,c = encoder(input_tensor[ei],
                                                     encoder_hidden,c)
            encoder_outputs[ei] += encoder_output[0, 0]
    
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden.mean(dim=0,keepdim=True)
        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [30]:
def evaluateRandomly4(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate4(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [31]:
from torchmetrics.text.rouge import ROUGEScore
from tqdm import tqdm

rouge = ROUGEScore()

def test4(encoder, decoder, testing_pairs):
    input = []
    gt = []
    predict = []
    metric_score = {
        "rouge1_fmeasure":[],
        "rouge1_precision":[],
        "rouge1_recall":[],
        "rouge2_fmeasure":[],
        "rouge2_precision":[],
        "rouge2_recall":[]
    }
    from tqdm import tqdm
    for i in tqdm(range(len(testing_pairs))):
        pair = testing_pairs[i]
        output_words = evaluate4(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)

        input.append(pair[0])
        gt.append(pair[1])
        predict.append(output_sentence)

        try:
            rs = rouge(output_sentence, pair[1])
        except:
            continue
        metric_score["rouge1_fmeasure"].append(rs['rouge1_fmeasure'])
        metric_score["rouge1_precision"].append(rs['rouge1_precision'])
        metric_score["rouge1_recall"].append(rs['rouge1_recall'])
        metric_score["rouge2_fmeasure"].append(rs['rouge2_fmeasure'])
        metric_score["rouge2_precision"].append(rs['rouge2_precision'])
        metric_score["rouge2_recall"].append(rs['rouge2_recall'])

    metric_score["rouge1_fmeasure"] = np.array(metric_score["rouge1_fmeasure"]).mean()
    metric_score["rouge1_precision"] = np.array(metric_score["rouge1_precision"]).mean()
    metric_score["rouge1_recall"] = np.array(metric_score["rouge1_recall"]).mean()
    metric_score["rouge2_fmeasure"] = np.array(metric_score["rouge2_fmeasure"]).mean()
    metric_score["rouge2_precision"] = np.array(metric_score["rouge2_precision"]).mean()
    metric_score["rouge2_recall"] = np.array(metric_score["rouge2_recall"]).mean()

    print("=== Evaluation score - Rouge score ===")
    print("Rouge1 fmeasure:\t",metric_score["rouge1_fmeasure"])
    print("Rouge1 precision:\t",metric_score["rouge1_precision"])
    print("Rouge1 recall:  \t",metric_score["rouge1_recall"])
    print("Rouge2 fmeasure:\t",metric_score["rouge2_fmeasure"])
    print("Rouge2 precision:\t",metric_score["rouge2_precision"])
    print("Rouge2 recall:  \t",metric_score["rouge2_recall"])
    print("=====================================")
    return input,gt,predict,metric_score

In [32]:
hidden_size = 256
epochs = 6

encoder4 = EncoderBiLSTM(input_lang.n_words, hidden_size).to(device)
decoder4 = AttnDecoderRNN4(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters4(encoder4, decoder4, epochs, print_every=5000)

Epoch: 0/6
1m 34s (- 37m 12s) (5000 4%) 3.3593
3m 11s (- 35m 58s) (10000 8%) 2.9314
4m 49s (- 34m 37s) (15000 12%) 2.7352
6m 27s (- 33m 10s) (20000 16%) 2.5547
Epoch: 1/6
8m 6s (- 31m 40s) (25000 20%) 2.3475
9m 44s (- 30m 4s) (30000 24%) 2.2286
11m 23s (- 28m 31s) (35000 28%) 2.1320
13m 2s (- 26m 55s) (40000 32%) 2.0608
Epoch: 2/6
14m 40s (- 25m 18s) (45000 36%) 1.9184
16m 19s (- 23m 42s) (50000 40%) 1.8578
17m 58s (- 22m 5s) (55000 44%) 1.7912
19m 37s (- 20m 28s) (60000 48%) 1.7528
Epoch: 3/6
21m 16s (- 18m 51s) (65000 53%) 1.6511
22m 59s (- 17m 16s) (70000 57%) 1.6130
24m 42s (- 15m 41s) (75000 61%) 1.5643
26m 25s (- 14m 4s) (80000 65%) 1.5532
Epoch: 4/6
28m 8s (- 12m 27s) (85000 69%) 1.4582
29m 50s (- 10m 48s) (90000 73%) 1.4232
31m 31s (- 9m 10s) (95000 77%) 1.3918
33m 13s (- 7m 31s) (100000 81%) 1.4171
Epoch: 5/6
34m 57s (- 5m 51s) (105000 85%) 1.3413
36m 39s (- 4m 12s) (110000 89%) 1.2958
38m 21s (- 2m 32s) (115000 93%) 1.2664
40m 4s (- 0m 52s) (120000 97%) 1.2644


In [33]:
evaluateRandomly4(encoder4, decoder4)

> je te libere .
= i m letting you go .
< i m leaving you . <EOS>

> vous etes mon seul veritable ami .
= you re my only real friend .
< you re my only real friend . <EOS>

> elle est plus vieille que toi de deux ans .
= she is two years older than you .
< she is two years older than you . <EOS>

> je suis le chef de cette equipe .
= i m the leader of this team .
< i m the s of this team . <EOS>

> il fait ca depuis plus de vingt ans .
= he s been doing this for over twenty years .
< he s been as this s s than you . <EOS>

> tu es sur la liste n est ce pas ?
= you re on the list aren t you ?
< you re worried about you you ? <EOS>

> tu n es pas mariee si ?
= you re not married are you ?
< you re not married are you ? <EOS>

> si j ai offusque quelqu un je m en excuse .
= i m sorry if i offended anyone .
< i m quite someone because i m . <EOS>

> ce n est pas un mauvais bougre .
= he s not a bad guy .
< he is not a bad guy . <EOS>

> je suis etudiant en deuxieme annee .
= i m a second y

In [34]:
input,gt,predict,score = test4(encoder4, decoder4, test_pairs)

100%|██████████| 2271/2271 [00:21<00:00, 107.82it/s]


=== Evaluation score - Rouge score ===
Rouge1 fmeasure:	 0.6059015
Rouge1 precision:	 0.57410693
Rouge1 recall:  	 0.6513969
Rouge2 fmeasure:	 0.4183748
Rouge2 precision:	 0.39016286
Rouge2 recall:  	 0.46071836


Transformer-encoder
==================

In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class EncoderTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderTransformer, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.pos_encoding = PositionalEncoding(hidden_size)
        encoder_layer = TransformerEncoderLayer(hidden_size, nhead=8)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers)

    def forward(self, input):
        embedded = self.embedding(input)  # [seq_len, batch_size, hidden_size]
        embedded = embedded * math.sqrt(self.hidden_size)
        embedded = self.pos_encoding(embedded)
        output = self.transformer_encoder(embedded)  # [seq_len, batch_size, hidden_size]
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, hidden_size, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)
        pe = torch.zeros(max_len, hidden_size)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, hidden_size, 2).float() * (-math.log(10000.0) / hidden_size))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [79]:
class AttnDecoderRNN5(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN5, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)

        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs)

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [80]:
teacher_forcing_ratio = 0.5
def train5(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
    
    loss = 0

#     for ei in range(input_length):
#         encoder_output = encoder(input_tensor[ei])
#         encoder_outputs[ei] = encoder_output[0, 0]
    
    encoder_output = encoder(input_tensor)
    encoder_output = encoder_output.squeeze(1)
    encoder_outputs[:input_length,:]+=encoder_output
    encoder_outputs = encoder_outputs.unsqueeze(0)
    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_outputs[:,-1,:].unsqueeze(0)
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()
    
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [81]:
def trainIters5(encoder, decoder, epochs, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(),lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(),lr = learning_rate)

    criterion = nn.NLLLoss()

    iter = 1
    n_iters = len(train_pairs) * epochs

    for epoch in range(epochs):
        print("Epoch: %d/%d" % (epoch, epochs))
        for training_pair in train_pairs:
            training_pair = tensorsFromPair(training_pair)

            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train5(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                            iter, iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

            iter +=1

    showPlot(plot_losses)

In [82]:
def evaluate5(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        
        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

#         for ei in range(input_length):
#             encoder_output = encoder(input_tensor[ei])
#             encoder_outputs[ei] += encoder_output[0, 0]

        encoder_output = encoder(input_tensor)
        encoder_output = encoder_output.squeeze(1)
        encoder_outputs[:input_length,:]+=encoder_output
        encoder_outputs = encoder_outputs.unsqueeze(0)

        decoder_hidden = encoder_outputs[:,-1,:].unsqueeze(0)

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [83]:
def evaluateRandomly5(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate5(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [84]:
from torchmetrics.text.rouge import ROUGEScore
from tqdm import tqdm

rouge = ROUGEScore()

def test5(encoder, decoder, testing_pairs):
    input = []
    gt = []
    predict = []
    metric_score = {
        "rouge1_fmeasure":[],
        "rouge1_precision":[],
        "rouge1_recall":[],
        "rouge2_fmeasure":[],
        "rouge2_precision":[],
        "rouge2_recall":[]
    }
    from tqdm import tqdm
    for i in tqdm(range(len(testing_pairs))):
        pair = testing_pairs[i]
        output_words = evaluate5(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)

        input.append(pair[0])
        gt.append(pair[1])
        predict.append(output_sentence)

        try:
            rs = rouge(output_sentence, pair[1])
        except:
            continue
        metric_score["rouge1_fmeasure"].append(rs['rouge1_fmeasure'])
        metric_score["rouge1_precision"].append(rs['rouge1_precision'])
        metric_score["rouge1_recall"].append(rs['rouge1_recall'])
        metric_score["rouge2_fmeasure"].append(rs['rouge2_fmeasure'])
        metric_score["rouge2_precision"].append(rs['rouge2_precision'])
        metric_score["rouge2_recall"].append(rs['rouge2_recall'])

    metric_score["rouge1_fmeasure"] = np.array(metric_score["rouge1_fmeasure"]).mean()
    metric_score["rouge1_precision"] = np.array(metric_score["rouge1_precision"]).mean()
    metric_score["rouge1_recall"] = np.array(metric_score["rouge1_recall"]).mean()
    metric_score["rouge2_fmeasure"] = np.array(metric_score["rouge2_fmeasure"]).mean()
    metric_score["rouge2_precision"] = np.array(metric_score["rouge2_precision"]).mean()
    metric_score["rouge2_recall"] = np.array(metric_score["rouge2_recall"]).mean()

    print("=== Evaluation score - Rouge score ===")
    print("Rouge1 fmeasure:\t",metric_score["rouge1_fmeasure"])
    print("Rouge1 precision:\t",metric_score["rouge1_precision"])
    print("Rouge1 recall:  \t",metric_score["rouge1_recall"])
    print("Rouge2 fmeasure:\t",metric_score["rouge2_fmeasure"])
    print("Rouge2 precision:\t",metric_score["rouge2_precision"])
    print("Rouge2 recall:  \t",metric_score["rouge2_recall"])
    print("=====================================")
    return input,gt,predict,metric_score

In [85]:
hidden_size = 256
epochs = 4
num_layers=4

encoder5 = EncoderTransformer(input_lang.n_words, hidden_size, num_layers).to(device)
decoder5 = AttnDecoderRNN5(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters5(encoder5, decoder5, epochs, print_every=1000,learning_rate=0.005)

Epoch: 0/4
0m 20s (- 27m 1s) (1000 1%) 3.7144
0m 41s (- 27m 15s) (2000 2%) 3.2865
1m 2s (- 27m 21s) (3000 3%) 3.1841
1m 24s (- 27m 14s) (4000 4%) 3.1072
1m 45s (- 26m 59s) (5000 6%) 3.0407
2m 6s (- 26m 40s) (6000 7%) 2.9831
2m 28s (- 26m 23s) (7000 8%) 2.9948
2m 49s (- 26m 6s) (8000 9%) 2.9158
3m 11s (- 25m 48s) (9000 11%) 2.8825
3m 33s (- 25m 29s) (10000 12%) 2.8077
3m 54s (- 25m 10s) (11000 13%) 2.8135
4m 16s (- 24m 52s) (12000 14%) 2.8400
4m 38s (- 24m 33s) (13000 15%) 2.7501
5m 0s (- 24m 13s) (14000 17%) 2.6982
5m 21s (- 23m 52s) (15000 18%) 2.6678
5m 43s (- 23m 32s) (16000 19%) 2.6825
6m 5s (- 23m 12s) (17000 20%) 2.6682
6m 27s (- 22m 51s) (18000 22%) 2.5803
6m 49s (- 22m 31s) (19000 23%) 2.6527
7m 10s (- 22m 10s) (20000 24%) 2.5777
Epoch: 1/4
7m 32s (- 21m 49s) (21000 25%) 2.5311
7m 54s (- 21m 29s) (22000 26%) 2.5026
8m 16s (- 21m 8s) (23000 28%) 2.5082
8m 38s (- 20m 47s) (24000 29%) 2.4449
9m 0s (- 20m 25s) (25000 30%) 2.4305
9m 21s (- 20m 4s) (26000 31%) 2.4031
9m 43s (- 19m 43

In [89]:
#损失下降不明显了，可以调低学习率再跑1个epoch试试
trainIters5(encoder5, decoder5, 1, print_every=1000,learning_rate=0.001)

Epoch: 0/1
0m 22s (- 7m 10s) (1000 4%) 1.7298
0m 44s (- 6m 48s) (2000 9%) 1.7767
1m 6s (- 6m 26s) (3000 14%) 1.7092
1m 28s (- 6m 2s) (4000 19%) 1.6992
1m 50s (- 5m 40s) (5000 24%) 1.6937
2m 12s (- 5m 18s) (6000 29%) 1.6963
2m 34s (- 4m 55s) (7000 34%) 1.7529
2m 56s (- 4m 33s) (8000 39%) 1.6309
3m 18s (- 4m 11s) (9000 44%) 1.6501
3m 39s (- 3m 49s) (10000 48%) 1.6147
4m 1s (- 3m 27s) (11000 53%) 1.6489
4m 23s (- 3m 5s) (12000 58%) 1.6674
4m 45s (- 2m 43s) (13000 63%) 1.5991
5m 7s (- 2m 21s) (14000 68%) 1.5680
5m 29s (- 1m 59s) (15000 73%) 1.5977
5m 51s (- 1m 37s) (16000 78%) 1.6122
6m 13s (- 1m 15s) (17000 83%) 1.5864
6m 35s (- 0m 53s) (18000 88%) 1.5508
6m 57s (- 0m 31s) (19000 92%) 1.5786
7m 20s (- 0m 9s) (20000 97%) 1.4717


In [93]:
trainIters5(encoder5, decoder5, 1, print_every=1000,learning_rate=0.0005)

Epoch: 0/1
0m 22s (- 7m 14s) (1000 4%) 1.4623
0m 44s (- 6m 50s) (2000 9%) 1.5248
1m 7s (- 6m 29s) (3000 14%) 1.4842
1m 29s (- 6m 7s) (4000 19%) 1.4657
1m 51s (- 5m 44s) (5000 24%) 1.4772
2m 13s (- 5m 21s) (6000 29%) 1.4578
2m 35s (- 4m 58s) (7000 34%) 1.5347
2m 57s (- 4m 36s) (8000 39%) 1.4972
3m 20s (- 4m 14s) (9000 44%) 1.5611
3m 41s (- 3m 51s) (10000 48%) 1.5113
4m 4s (- 3m 29s) (11000 53%) 1.5331
4m 26s (- 3m 7s) (12000 58%) 1.5795
4m 48s (- 2m 44s) (13000 63%) 1.4698
5m 10s (- 2m 22s) (14000 68%) 1.4892
5m 32s (- 2m 0s) (15000 73%) 1.5089
5m 54s (- 1m 38s) (16000 78%) 1.5077
6m 16s (- 1m 16s) (17000 83%) 1.5168
6m 38s (- 0m 53s) (18000 88%) 1.4753
7m 0s (- 0m 31s) (19000 92%) 1.5004
7m 22s (- 0m 9s) (20000 97%) 1.4168


In [96]:
trainIters5(encoder5, decoder5, 1, print_every=1000,learning_rate=0.0001)

Epoch: 0/1
0m 22s (- 7m 22s) (1000 4%) 1.4427
0m 45s (- 7m 0s) (2000 9%) 1.4825
1m 8s (- 6m 36s) (3000 14%) 1.4589
1m 30s (- 6m 11s) (4000 19%) 1.4287
1m 52s (- 5m 48s) (5000 24%) 1.4339
2m 15s (- 5m 25s) (6000 29%) 1.4167
2m 37s (- 5m 1s) (7000 34%) 1.4942
2m 59s (- 4m 39s) (8000 39%) 1.4408
3m 21s (- 4m 16s) (9000 44%) 1.4929
3m 43s (- 3m 53s) (10000 48%) 1.4488
4m 6s (- 3m 31s) (11000 53%) 1.4844
4m 28s (- 3m 8s) (12000 58%) 1.5326
4m 51s (- 2m 46s) (13000 63%) 1.4473
5m 13s (- 2m 24s) (14000 68%) 1.4266
5m 35s (- 2m 1s) (15000 73%) 1.4573
5m 58s (- 1m 39s) (16000 78%) 1.4538
6m 21s (- 1m 17s) (17000 83%) 1.4417
6m 44s (- 0m 54s) (18000 88%) 1.4004
7m 7s (- 0m 32s) (19000 92%) 1.4579
7m 30s (- 0m 9s) (20000 97%) 1.3398


In [100]:
trainIters5(encoder5, decoder5, 1, print_every=1000,learning_rate=0.0001)

Epoch: 0/1
0m 22s (- 7m 22s) (1000 4%) 1.4151
0m 45s (- 7m 2s) (2000 9%) 1.4718
1m 8s (- 6m 39s) (3000 14%) 1.4355
1m 31s (- 6m 16s) (4000 19%) 1.4205
1m 54s (- 5m 52s) (5000 24%) 1.4381
2m 16s (- 5m 28s) (6000 29%) 1.4204
2m 38s (- 5m 4s) (7000 34%) 1.4798
3m 1s (- 4m 41s) (8000 39%) 1.4386
3m 23s (- 4m 18s) (9000 44%) 1.4926
3m 45s (- 3m 55s) (10000 48%) 1.4549
4m 7s (- 3m 32s) (11000 53%) 1.4613
4m 30s (- 3m 10s) (12000 58%) 1.5035
4m 52s (- 2m 47s) (13000 63%) 1.4390
5m 14s (- 2m 24s) (14000 68%) 1.3936
5m 37s (- 2m 2s) (15000 73%) 1.4485
5m 59s (- 1m 39s) (16000 78%) 1.4421
6m 21s (- 1m 17s) (17000 83%) 1.4317
6m 44s (- 0m 54s) (18000 88%) 1.4058
7m 6s (- 0m 32s) (19000 92%) 1.4321
7m 28s (- 0m 9s) (20000 97%) 1.3334


In [104]:
evaluateRandomly5(encoder5, decoder5)

> je suis soudainement fatigue .
= i m suddenly tired .
< i m already tired . <EOS>

> il semble s interesser a moi .
= he seems interested in me .
< he seems to to me . <EOS>

> je ne suis pas telepathe .
= i m not a psychic .
< i m not a . <EOS>

> tu es un peu en retard .
= you re a little late .
< you re a little late . <EOS>

> nous sommes une bonne equipe .
= we re a good team .
< we re a good good . <EOS>

> je fais ca pour l avenir de nos enfants .
= i m doing this for our kids future .
< i m doing this for this this . . <EOS>

> vous etes habilles trop chaudement .
= you re dressed too warmly .
< you re probably too young . <EOS>

> ce sont tous les deux de bons professeurs .
= they are both good teachers .
< they re both good . . <EOS>

> nous sommes tous deux catholiques .
= we re both catholics .
< we re both single . <EOS>

> espece de gros porc !
= you re such a pig .
< you re such a liar . <EOS>



In [102]:
input,gt,predict,score = test5(encoder5, decoder5, test_pairs)

100%|██████████| 2271/2271 [00:21<00:00, 107.11it/s]


=== Evaluation score - Rouge score ===
Rouge1 fmeasure:	 0.5901358
Rouge1 precision:	 0.56192356
Rouge1 recall:  	 0.6300863
Rouge2 fmeasure:	 0.4014541
Rouge2 precision:	 0.37587482
Rouge2 recall:  	 0.43890423
