In [19]:
!pip install torch



In [20]:
#Importing necessary libraries 
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random
import time 
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

#Check for CUDA availability and set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Ignore duplicate warnings
import warnings
warnings.filterwarnings('ignore')


In [21]:
#Define file path and load the data into a DataFrame
data_path = '/opt/homebrew/anaconda3/NLP/Teachback code/Data/fra-eng/fra.txt'
df = pd.read_csv(data_path,delimiter='\t',header=None,usecols=[0,1])
df.columns = ['English','French']
df.head()

Unnamed: 0,English,French
0,Go.,Va !
1,Go.,Marche.
2,Go.,En route !
3,Go.,Bouge !
4,Hi.,Salut !


In [22]:
#Define SOS_token and EOS_token constants as 0 and 1 
SOS_token = 0
EOS_token = 1

#Language class to store word-to-index and index-to-word mappings for a language
class Lang:
    def __init__(self, name):
        ''' Constructor for the Lang class.
        
        Input Parameters: name (str) - The name of the language.
        
        Output Parameters: None '''
        self.name = name
        self.word2index = {}  #Mapping from word to index
        self.word2count = {}  #Count of occurrences of each word
        self.index2word = {0: "SOS", 1: "EOS"}  #Mapping from index to word
        self.n_words = 2  #Total number of words in the language, initialized with SOS and EOS tokens

    def addSentence(self, sentence):
        ''' Add a sentence to the language object, updating the word-to-index and index-to-word mappings.
       
        Input Parameters: sentence (str) - The input sentence.
        
        Output Parameters: None '''
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        ''' Add a word to the language object, updating the word-to-index and index-to-word mappings.
        
        Input Parameters: word (str) - The input word.
        
        Output Parameters: None '''
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

#Function to convert a Unicode string to plain ASCII
def unicodeToAscii(s):
    ''' Convert a Unicode string to plain ASCII by removing diacritics.
    
    Input Parameters: s (str) - The input Unicode string.
    
    Output Parameters: str - The resulting plain ASCII string. '''
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

#Function to normalize a string by converting it to  lowercase, triming, and removing the non-letter characters
def normalizeString(s):
    ''' Normalize a string by converting it to lowercase, trimming, and removing non-letter characters.
    
    Input Parameters: s (str) - The input string.
    
    Output Parameters: str - The normalized string. '''
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)  #Add spaces before punctuation marks
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)  #Remove non-letter characters
    return s.strip()

#Function to read and preprocess the language pairs from a DataFrame
def readLangs(lang1, lang2, reverse=False):
    ''' Read language pairs from a DataFrame, normalize them, and create Lang class instances.
    
    Input Parameters: lang1 (str) - Name of the first language.
                      lang2 (str) - Name of the second language.
                      reverse (bool) - If True, reverse the order of language pairs.

    Output Parameters: input_lang (Lang) - Language object for the first language.
                       output_lang (Lang) - Language object for the second language.
                       pairs (list) - List of language pairs, where each pair is a list containing two sentences. '''
    print("Reading data...")
    #Get English and French sentences from the DataFrame
    eng_sentences = df['English'].tolist()
    fra_sentences = df['French'].tolist()
    #Combine sentences into pairs and normalize the sentences
    pairs = [[normalizeString(eng), normalizeString(fra)] for eng, fra in zip(eng_sentences, fra_sentences)]
    #Reverse pairs if required and create Lang class instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    return input_lang, output_lang, pairs

#Function to filter a language pair based on their lengths and prefixes
def filterPair(p):
    ''' Filter a language pair based on the lengths of both sentences and the prefixes of the second sentence.
    
    Input Parameters: p (list) - A language pair, where each pair is a list containing two sentences.

    Output Parameters: bool - True if the pair satisfies the filtering conditions, False otherwise. '''
    return len(p[0].split(' ')) < MAX_LENGTH and \
           len(p[1].split(' ')) < MAX_LENGTH and \
           p[1].startswith(eng_prefixes)

#Function to filter a list of language pairs
def filterPairs(pairs):
    ''' Filter a list of language pairs using the filterPair function.
    
    Input Parameters: pairs (list) - List of language pairs, where each pair is a list containing two sentences.

    Output Parameters: list - List of filtered language pairs. '''
    return [pair for pair in pairs if filterPair(pair)]

#Define a maximum sentence length and common english sentence prefixes
MAX_LENGTH = 10
eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

#Function to prepare the data for training
def prepareData(lang1, lang2, reverse=False):
    ''' Prepare the data for training by reading language pairs, filtering them, and creating language objects.
    
    Input Parameters: lang1 (str) - Name of the first language.
                      lang2 (str) - Name of the second language.
                      reverse (bool) - If True, reverse the order of language pairs.
   
     Output Parameters: input_lang (Lang) - Language object for the first language.
                       output_lang (Lang) - Language object for the second language.
                       pairs (list) - List of filtered language pairs. '''
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

#Function call to prepare the data 
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
#Printing sample pairs 
print(random.choice(pairs))

Reading data...
Read 227815 sentence pairs
Trimmed to 17917 sentence pairs
Counting words...
Counted words:
fra 5699
eng 3703
['j essaie de me rappeler', 'i m trying to remember']


In [23]:
#EncoderRNN class to define the Encoder part of the Seq2Seq model
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p=0.1):
        ''' Constructor for the EncoderRNN class.

        Input Parameters: input_size (int) - The size of the input vocabulary.
                          hidden_size (int) - The size of the hidden state of the GRU.
                          dropout_p (float) - Dropout probability for the dropout layer.

        Output Parameters: None '''
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input):
        ''' Forward pass of the EncoderRNN.

        Input Parameters: input (torch.Tensor) - The input tensor representing the input sentences.

        Output Parameters: output (torch.Tensor) - The output tensor from the GRU layer.
                           hidden (torch.Tensor) - The hidden state of the GRU. '''
        embedded = self.dropout(self.embedding(input))
        output, hidden = self.gru(embedded)
        return output, hidden

#DecoderRNN class to define the Decoder part of the Seq2Seq model
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        ''' Constructor for the DecoderRNN class.

        Input Parameters: hidden_size (int) - The size of the hidden state of the GRU.
                          output_size (int) - The size of the output vocabulary.

        Output Parameters: None '''
        super(DecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        ''' Forward pass of the DecoderRNN.

        Input Parameters: encoder_outputs (torch.Tensor) - The output tensor from the EncoderRNN.
                          encoder_hidden (torch.Tensor) - The hidden state of the EncoderRNN.
                          target_tensor (torch.Tensor) - The target tensor representing the target sentences. (Optional)

        Output Parameters: decoder_outputs (torch.Tensor) - The output tensor from the DecoderRNN.
                           decoder_hidden (torch.Tensor) - The hidden state of the DecoderRNN.
                           None - As attention is not used in this model, None is returned for consistency in the training loop. '''
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden  = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)
            if target_tensor is not None:
                #Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) #Teacher forcing
            else:
                #Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  #detach from history as input
        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None  #We return `None` for consistency in the training loop

    def forward_step(self, input, hidden):
        ''' Forward step of the DecoderRNN.

        Input Parameters: input (torch.Tensor) - The input tensor representing the input sentence.
                          hidden (torch.Tensor) - The hidden state of the DecoderRNN.

        Output Parameters: output (torch.Tensor) - The output tensor from the DecoderRNN.
                           hidden (torch.Tensor) - The hidden state of the DecoderRNN. '''
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output)
        return output, hidden

#BahdanauAttention class to define the Attention mechanism for the DecoderRNN
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size):
        ''' Constructor for the BahdanauAttention class.

        Input Parameters: hidden_size (int) - The size of the hidden state of the GRU.

        Output Parameters: None '''
        super(BahdanauAttention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        ''' Forward pass of the BahdanauAttention.

        Input Parameters: query (torch.Tensor) - The query tensor representing the hidden state of the DecoderRNN.
                          keys (torch.Tensor) - The keys tensor representing the output from the EncoderRNN.

        Output Parameters: context (torch.Tensor) - The context tensor, the weighted sum of encoder outputs based on attention.
                           weights (torch.Tensor) - The attention weights indicating the importance of each encoder output. '''
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze(2).unsqueeze(1)
        weights = F.softmax(scores, dim=-1)
        context = torch.bmm(weights, keys)
        return context, weights

#AttnDecoderRNN class to define the Decoder with Attention mechanism
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        ''' Constructor for the AttnDecoderRNN class.

        Input Parameters: hidden_size (int) - The size of the hidden state of the GRU.
                          output_size (int) - The size of the output vocabulary.
                          dropout_p (float) - Dropout probability for the dropout layer.

        Output Parameters: None '''
        super(AttnDecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attention = BahdanauAttention(hidden_size)
        self.gru = nn.GRU(2 * hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        ''' Forward pass of the AttnDecoderRNN.

        Input Parameters: encoder_outputs (torch.Tensor) - The output tensor from the EncoderRNN.
                          encoder_hidden (torch.Tensor) - The hidden state of the EncoderRNN.
                          target_tensor (torch.Tensor) - The target tensor representing the target sentences. (Optional) 

        Output Parameters: decoder_outputs (torch.Tensor) - The output tensor from the AttnDecoderRNN.
                           decoder_hidden (torch.Tensor) - The hidden state of the AttnDecoderRNN.
                           attentions (torch.Tensor) - The attention weights indicating the importance of each encoder output for each timestep. '''
        batch_size = encoder_outputs.size(0)
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []
        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(decoder_input, decoder_hidden, encoder_outputs)
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)
            if target_tensor is not None:
                #Teacher forcing: Feed the target as the next input
                decoder_input = target_tensor[:, i].unsqueeze(1) #Teacher forcing
            else:
                #Without teacher forcing: use its own predictions as the next input
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()  #detach from history as input
        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)
        return decoder_outputs, decoder_hidden, attentions

    def forward_step(self, input, hidden, encoder_outputs):
        ''' Forward step of the AttnDecoderRNN.

        Input Parameters: input (torch.Tensor) - The input tensor representing the input sentence.
                          hidden (torch.Tensor) - The hidden state of the DecoderRNN.
                          encoder_outputs (torch.Tensor) - The output tensor from the EncoderRNN.
                          
        Output Parameters: output (torch.Tensor) - The output tensor from the AttnDecoderRNN.
                           hidden (torch.Tensor) - The hidden state of the AttnDecoderRNN.
                           attn_weights (torch.Tensor) - The attention weights indicating the importance of each encoder output. '''
        embedded = self.dropout(self.embedding(input))
        query = hidden.permute(1, 0, 2)
        context, attn_weights = self.attention(query, encoder_outputs)
        input_gru = torch.cat((embedded, context), dim=2)
        output, hidden = self.gru(input_gru, hidden)
        output = self.out(output)
        return output, hidden, attn_weights

In [24]:
#Function to convert a sentence to a list of indexes using a language class object
def indexesFromSentence(lang, sentence):
    ''' Convert a sentence to a list of indexes using the word-to-index mapping of the given language object.

    Input Parameters: lang (Lang) - The language object containing the word-to-index mapping.
                      sentence (str) - The input sentence.

    Output Parameters: list - A list of indexes representing the input sentence. '''
    return [lang.word2index[word] for word in sentence.split(' ')]

#Function to convert a sentence to a PyTorch tensor using a language object
def tensorFromSentence(lang, sentence):
    ''' Convert a sentence to a PyTorch tensor using the word-to-index mapping of the given language object.
    
    Input Parameters: lang (Lang) - The language object containing the word-to-index mapping.
                      sentence (str) - The input sentence.

    Output Parameters: torch.Tensor - A PyTorch tensor representing the input sentence. '''
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)

#Function to create PyTorch tensors from a language pair
def tensorsFromPair(pair):
    ''' Create PyTorch tensors from a language pair.

    Input Parameters: pair (tuple) - A tuple containing two sentences, the source sentence and the target sentence.

    Output Parameters: tuple - A tuple containing two PyTorch tensors, one for the source sentence and one for the target sentence. '''
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

#Function to create a DataLoader for the training data
def get_dataloader(batch_size):
    ''' Create a DataLoader for the training data.

    Input Parameters: batch_size (int) - The batch size for the DataLoader.

    Output Parameters: input_lang (Lang) - The language object for the source language.
                       output_lang (Lang) - The language object for the target language.
                       train_dataloader (DataLoader) - The DataLoader for the training data. '''
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    n = len(pairs)
    input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)

    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids = indexesFromSentence(input_lang, inp)
        tgt_ids = indexesFromSentence(output_lang, tgt)
        inp_ids.append(EOS_token)
        tgt_ids.append(EOS_token)
        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids
    #Convert the numpy arrays to PyTorch tensors
    input_ids_tensor = torch.LongTensor(input_ids).to(device)
    target_ids_tensor = torch.LongTensor(target_ids).to(device)
    #Create the TensorDataset
    train_data = TensorDataset(input_ids_tensor, target_ids_tensor)
    #Use RandomSampler for shuffling the data during training
    train_sampler = RandomSampler(train_data)
    #Create the DataLoader
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    return input_lang, output_lang, train_dataloader

In [25]:
#Function to train one epoch
def train_epoch(dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    ''' Train one epoch of the Seq2Seq model.
    
    Input Parameters: dataloader (DataLoader) - The DataLoader containing the training data.
                      encoder (EncoderRNN) - The EncoderRNN model.
                      decoder (AttnDecoderRNN) - The AttnDecoderRNN model.
                      encoder_optimizer (torch.optim) - The optimizer for the EncoderRNN.
                      decoder_optimizer (torch.optim) - The optimizer for the AttnDecoderRNN.
                      criterion (torch.nn) - The loss function.

    Output Parameters: float - The average loss over the entire epoch. '''
    total_loss = 0
    #Loop through the DataLoader
    for data in dataloader:
        input_tensor, target_tensor = data
        #Zero the gradients for both the optimizers
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        #Get the encoder outputs and hidden state
        encoder_outputs, encoder_hidden = encoder(input_tensor)
        #Get the decoder outputs, decoder hidden state, and attention weights
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)
        #Calculate the loss
        loss = criterion(decoder_outputs.view(-1, decoder_outputs.size(-1)),target_tensor.view(-1))
        loss.backward()
        #Update the weights using the optimizers
        encoder_optimizer.step()
        decoder_optimizer.step()
        #Accumulate the loss for the entire epoch
        total_loss += loss.item()
    #Calculate and return the average loss over the entire epoch
    return total_loss / len(dataloader)

In [26]:
#Function to convert time in seconds to a human-readable format in minutes and seconds
def asMinutes(s):
    ''' Convert time in seconds to a human-readable format in minutes and seconds.

    Input Parameters: s (float) - The time in seconds.

    Output Parameters: str - A formatted string representing time in minutes and seconds. '''
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

#Function to calculate the time remaining until completion, given the starting time and completion percentage
def timeSince(since, percent):
    ''' Calculate the time remaining until completion given the starting time and completion percentage.

    Input Parameters: since (float) - The starting time in seconds.
                      percent (float) - The completion percentage.

    Output Parameters: str - A formatted string representing the time remaining until completion. '''
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [27]:
#Function to train the Seq2Seq model
def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
               print_every=100, plot_every=100):
    ''' Train the Seq2Seq model for a specified number of epochs.

    Input Parameters: train_dataloader (DataLoader) - The DataLoader containing the training data.
                      encoder (EncoderRNN) - The EncoderRNN model.
                      decoder (AttnDecoderRNN) - The AttnDecoderRNN model.
                      n_epochs (int) - The number of epochs to train the model.
                      learning_rate (float) - The learning rate for the optimizer. (Default: 0.001)
                      print_every (int) - Frequency of printing training progress (in number of epochs). 
                      plot_every (int) - Frequency of plotting training progress (in number of epochs). 
                      
    Output Parameters: None '''
    start = time.time()
    plot_losses = []
    print_loss_total = 0  #Reset every print_loss_total
    plot_loss_total = 0  #Reset every plot_loss_total
    #Create optimizer and loss function
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()
    #Loop through the specified number of epochs
    for epoch in range(1, n_epochs + 1):
        #Train one epoch
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        #Print progress every print_every epochs
        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),epoch, epoch / n_epochs * 100, print_loss_avg))
        #Plot progress every plot_every epochs
        if epoch % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    #Plot the training loss curve
    showPlot(plot_losses)

In [28]:
plt.switch_backend('agg')
#Function to plot the training loss curve
def showPlot(points):
    ''' Plot the training loss curve.

    Input Parameters: points (list) - A list of training loss values at different points in the training.

    Output Parameters: None '''
    plt.figure()
    fig, ax = plt.subplots()
    loc = ticker.MultipleLocator(base=0.2) #Set y-axis tick intervals
    ax.yaxis.set_major_locator(loc)
    plt.plot(points) #Plot the training loss curve

In [29]:
hidden_size = 128 #Set the hidden size of the RNN models
batch_size = 32 #Set the batch size for the DataLoader
#Get the language objects and training DataLoader using the get_dataloader function
input_lang, output_lang, train_dataloader = get_dataloader(batch_size)

Reading data...
Read 227815 sentence pairs
Trimmed to 17917 sentence pairs
Counting words...
Counted words:
fra 5699
eng 3703


In [30]:
#Create an instance of the EncoderRNN with the specified input size and hidden size
encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
#Create an instance of the AttnDecoderRNN with the specified hidden size and output size
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)
#Train the models using the train function for 80 epochs
#The print_every and plot_every arguments are set to 5, meaning the training progress will be printed and plotted every 5 epochs
train(train_dataloader, encoder, decoder, 80, print_every=5, plot_every=5)

1m 44s (- 26m 3s) (5 6%) 1.3848
3m 36s (- 25m 16s) (10 12%) 0.5538
5m 34s (- 24m 11s) (15 18%) 0.2923
7m 22s (- 22m 8s) (20 25%) 0.1813
9m 13s (- 20m 17s) (25 31%) 0.1298
11m 10s (- 18m 37s) (30 37%) 0.1021
12m 48s (- 16m 27s) (35 43%) 0.0853
14m 26s (- 14m 26s) (40 50%) 0.0751
16m 12s (- 12m 36s) (45 56%) 0.0678
17m 53s (- 10m 43s) (50 62%) 0.0630
19m 43s (- 8m 57s) (55 68%) 0.0592
21m 23s (- 7m 7s) (60 75%) 0.0563
23m 6s (- 5m 19s) (65 81%) 0.0543
24m 45s (- 3m 32s) (70 87%) 0.0521
26m 26s (- 1m 45s) (75 93%) 0.0506
28m 14s (- 0m 0s) (80 100%) 0.0492


In [31]:
#Function to evaluate the trained Seq2Seq model
def evaluate(encoder, decoder, sentence, input_lang, output_lang):
    ''' Evaluate the input sentence using the trained encoder and decoder models.
    
    Input Parameters: encoder (EncoderRNN) - The trained EncoderRNN model.
                      decoder (AttnDecoderRNN) - The trained AttnDecoderRNN model.
                      sentence (str) - The input sentence to be evaluated.
                      input_lang (Lang) - The language object for the source language.
                      output_lang (Lang) - The language object for the target language.

    Output Parameters: decoded_words (list) - A list of decoded words in the target language.
                       decoder_attn (torch.Tensor) - The attention weights generated by the decoder. '''
    #Disable gradient calculation since we are in evaluation mode
    with torch.no_grad():
        #Convert the input sentence to PyTorch tensor
        input_tensor = tensorFromSentence(input_lang, sentence)
        #Get the encoder outputs and hidden state
        encoder_outputs, encoder_hidden = encoder(input_tensor)
        #Get the decoder outputs, decoder hidden state, and attention weights
        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)
        #Get the indices of the top probabilities from the decoder outputs
        _, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()
        #Convert the indices to words using the output language object
        decoded_words = []
        for idx in decoded_ids:
            if idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            decoded_words.append(output_lang.index2word[idx.item()])
    #Return the list of decoded words and the attention weights
    return decoded_words, decoder_attn

In [32]:
def evaluateRandomly(encoder, decoder, n=10):
    '''Evaluate and print random examples from the training data using the trained encoder and decoder models.
    
    Input Parameters: encoder (EncoderRNN) - The trained EncoderRNN model.
                      decoder (AttnDecoderRNN) - The trained AttnDecoderRNN model.
                      n (int) - The number of random examples to evaluate and print. (Default: 10)

    Output Parameters: None '''
    #Loop through n random examples
    for i in range(n):
        #Choose a random pair from the training data
        pair = random.choice(pairs)
        #Print the input and target sentences
        print('>', pair[0])
        print('=', pair[1])
        #Evaluate the input sentence using the evaluate function to get the output words
        output_words, _ = evaluate(encoder, decoder, pair[0], input_lang, output_lang)
        #Convert the output words to a sentence and print them
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [33]:
#Set the encoder and decoder models to evaluation mode
encoder.eval()
decoder.eval()
#Call the evaluateRandomly function to evaluate and print random examples
#The encoder and decoder models are in evaluation mode, so they won't update their weights during evaluation
#The evaluateRandomly function randomly selects examples from the training data and generates translations using the trained models
evaluateRandomly(encoder, decoder)

> on ne reussira pas a arriver a l heure
= we re not going to make it in time
< we re not going to make it in time <EOS>

> je ne suis pas facilement offensee
= i m not easily offended
< i m not easily offended <EOS>

> je suis heureux de te voir ici
= i am happy to see you here
< i am happy to see you here <EOS>

> tu n es pas tres amusant
= you re not very funny
< you re not very funny <EOS>

> tu es tres observateur
= you re very observant
< you re very observant <EOS>

> tu es desormais en securite
= you re safe now
< you re safe now <EOS>

> on m a promue
= i m being promoted
< i m being promoted <EOS>

> ce n est qu une enfant
= she s only a child
< she is a mere child <EOS>

> tu vas y arriver apres tout
= you re going to make it after all
< you re going to make it after all <EOS>

> vous lisez mes pensees
= you re reading my mind
< you re reading my mind <EOS>



In [34]:
#Function to show the attention matrix as a heatmap
def showAttention(input_sentence, output_words, attentions):
    ''' Show the attention matrix as a heatmap.

    Input Parameters: input_sentence (str) - The input sentence in the source language.
                      output_words (list) - A list of decoded words in the target language.
                      attentions (torch.Tensor) - The attention weights generated by the decoder.
                      
    Output Parameters: None '''
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.cpu().numpy(), cmap='bone')
    fig.colorbar(cax)
    #Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)
    #Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    #Show the plot
    plt.show()

#Function to evaluate the input sentence, generate output, and show attention heatmap
def evaluateAndShowAttention(input_sentence):
    ''' Evaluate the input sentence using the trained encoder and decoder models,generate the output translation, and show the attention heatmap.

    Input Parameters: input_sentence (str) - The input sentence in the source language.

    Output Parameters: None '''
    #Evaluate the input sentence using the trained encoder and decoder models
    output_words, attentions = evaluate(encoder, decoder, input_sentence, input_lang, output_lang)
    #Print the input sentence and the generated output translation
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    #Show the attention heatmap for the input sentence and output translation
    #The attention matrix 'attentions' has shape (target_length, input_length),but we are interested in the attention from each target word to each input word.
    #Therefore, we select the attentions for the target words in the range of the generated output translation.
    showAttention(input_sentence, output_words, attentions[0, :len(output_words), :])

In [35]:
#Testing different sentences
evaluateAndShowAttention('il n est pas aussi grand que son pere')

evaluateAndShowAttention('je suis trop fatigue pour conduire')

evaluateAndShowAttention('je suis desole si c est une question idiote')

evaluateAndShowAttention('je suis reellement fiere de vous')

input = il n est pas aussi grand que son pere
output = he is not as tall as his father <EOS>
input = je suis trop fatigue pour conduire
output = i m too tired to drive <EOS>
input = je suis desole si c est une question idiote
output = i m sorry if this is a stupid question <EOS>
input = je suis reellement fiere de vous
output = i m really proud of you <EOS>
