<h1>Problem 3</h1> 
<t>Repeat problems 1 and 2, this time trying to translate from French to English. Train the model on the entire dataset and evaluate it on the entire dataset. Report training loss, validation loss, and validation accuracy. Also, try some qualitative validation, asking the network to generate French translations for some English sentences. Which one is more effective, French-to-English or English-to-French?</t>

<h2>Data Preprocessing</h2>

In [55]:
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import requests
import torch
import time

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torchinfo import summary
import numpy as np
import ast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

<h3>Load Text File:</h3>

In [56]:
textPath = "C:/Users/aidan_000/Desktop/UNCC/Github/Intro-to-DL/datasets/text-sequences/E2F.txt"

# Read lines from the text file and extract English-French sentence pairs
E2F = []
with open(textPath, 'r', encoding='utf-8') as f:
    E2F = ast.literal_eval(f.read())

<h3>English and French Dictionary mapping and tokenization</h3>

In [57]:
class Vocabulary:
    def __init__(self):
        # Initialize dictionaries for word to index and index to word mappings
        self.word2index = {"SOS": SOS_token, "EOS": EOS_token}
        self.index2word = {SOS_token:"SOS", EOS_token: "EOS"}
        self.word_count = {}  # Keep track of word frequencies
        self.n_words = 2  # Start counting from 3 to account for special tokens

    def add_sentence(self, sentence):
        # Add all words in a sentence to the vocabulary
        for word in sentence.split(' '):
            self.add_word(word)

    def add_word(self, word):
        # Add a word to the vocabulary
        if word not in self.word2index:
            # Assign a new index to the word and update mappings
            self.word2index[word] = self.n_words
            self.index2word[self.n_words] = word
            self.word_count[word] = 1
            self.n_words += 1
        else:
            # Increment word count if the word already exists in the vocabulary
            self.word_count[word] += 1

# Custom Dataset class for English to French sentences
class EngFrDataset(Dataset):
    def __init__(self, pairs):
        self.eng_vocab = Vocabulary()
        self.fr_vocab = Vocabulary()
        self.pairs = []

        # Process each English-French pair
        for eng, fr in pairs:
            self.eng_vocab.add_sentence(eng)
            self.fr_vocab.add_sentence(fr)
            self.pairs.append((eng, fr))

        # Separate English and French sentences
        self.eng_sentences = [pair[0] for pair in self.pairs]
        self.fr_sentences = [pair[1] for pair in self.pairs]

    def __len__(self):
        # Return the number of sentence pairs
        return len(self.pairs)

    def __getitem__(self, idx):
        # Get the sentences by index
        eng_sentence = self.eng_sentences[idx]
        fr_sentence = self.fr_sentences[idx]
        input_indices = [self.fr_vocab.word2index[word] for word in fr_sentence.split()] + [EOS_token]
        target_indices = [self.eng_vocab.word2index[word] for word in eng_sentence.split()] + [EOS_token]
        
        return torch.tensor(input_indices, dtype=torch.long), torch.tensor(target_indices, dtype=torch.long)

SOS_token = 0
EOS_token = 1

dataset = EngFrDataset(E2F)
train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
valid_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

<h2>French to English Encoder/Decoder GRU Model without Attention</h2>

<h3>Model Training and Inferencing Function:</h3>

In [58]:
class Encoder(nn.Module):
    """The Encoder part of the seq2seq model."""
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        # Forward pass for the encoder
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        # Initializes hidden state
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
class Decoder(nn.Module):
    """The Decoder part of the seq2seq model."""
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
                             
    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

<h3>Hyperparameters and Training:</h3>

In [59]:
def train_fn(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    # Initialize encoder hidden state
    encoder_hidden = encoder.initHidden()

    # Clear gradients for optimizers
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Calculate the length of input and target tensors
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    # Initialize loss
    loss = 0

    # Encoding each word in the input
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

    # Decoder's first input is the SOS token
    decoder_input = torch.tensor([[SOS_token]], device=device)

    # Decoder starts with the encoder's last hidden state
    decoder_hidden = encoder_hidden

    # Decoding loop
    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        # Choose top1 word from decoder's output
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()  # Detach from history as input

        # Calculate loss
        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == EOS_token:  # Stop if EOS token is generated
            break

    # Backpropagation
    loss.backward()

    # Update encoder and decoder parameters
    encoder_optimizer.step()
    decoder_optimizer.step()

    # Return average loss
    return loss.item() / target_length

def training(encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, dataloader, epochs, device):
    for epoch in range(epochs):
        total_loss = 0
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            # Move tensors to the correct device
            input_tensor = input_tensor[0].to(device)
            target_tensor = target_tensor[0].to(device)

            # Perform a single training step and update total loss
            loss = train_fn(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            total_loss += loss

        # Print loss every 10 epochs
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {total_loss / len(dataloader)}')

In [60]:
input_size = len(dataset.fr_vocab.word2index)
hidden_size = 256
output_size =  len(dataset.eng_vocab.word2index)

lr = 0.0001
epochs = 50

encoder = Encoder(input_size, hidden_size).to(device)
decoder = Decoder(hidden_size, output_size).to(device)

encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr)

criterion = nn.NLLLoss()

training(encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, train_dataloader, epochs, device)

Epoch 0, Loss: 3.859666400082552
Epoch 10, Loss: 2.49284116267863
Epoch 20, Loss: 1.8133120078692553
Epoch 30, Loss: 1.3120273689908164
Epoch 40, Loss: 0.7633754411264195


<h3>Evaluating and Comparing Target vs Predictions:</h3>

In [61]:
def evaluate_and_show_examples(encoder, decoder, dataloader, criterion, n_examples=10):
    # Switch model to evaluation mode
    encoder.eval()
    decoder.eval()

    fr_vocab = dataloader.dataset.fr_vocab
    eng_vocab = dataloader.dataset.eng_vocab
    
    total_loss = 0
    correct_predictions = 0

    # No gradient calculation
    with torch.no_grad():
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            # Move tensors to the correct device
            input_tensor = input_tensor[0].to(device)
            target_tensor = target_tensor[0].to(device)

            encoder_hidden = encoder.initHidden()

            input_length = input_tensor.size(0)
            target_length = target_tensor.size(0)

            loss = 0

            # Encoding step
            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)

            # Decoding step
            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden

            predicted_indices = []

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                predicted_indices.append(topi.item())
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
                if decoder_input.item() == EOS_token:
                    break

            # Calculate and print loss and accuracy for the evaluation
            total_loss += loss.item() / target_length
            if predicted_indices == target_tensor.tolist():
                correct_predictions += 1

            # Optionally, print some examples
            if i < n_examples:
                predicted_sentence = ' '.join([eng_vocab.index2word[index] for index in predicted_indices if index not in (SOS_token, EOS_token)])
                target_sentence = ' '.join([eng_vocab.index2word[index.item()] for index in target_tensor if index.item() not in (SOS_token, EOS_token)])
                input_sentence = ' '.join([fr_vocab.index2word[index.item()] for index in input_tensor if index.item() not in (SOS_token, EOS_token)])

                print(f'Input: {input_sentence}, Target: {target_sentence}, Predicted: {predicted_sentence}')

        # Print overall evaluation results
        average_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / len(dataloader)
        print(f'Evaluation Loss: {average_loss:.4f}, Accuracy: {100*accuracy:.2f}%')

In [62]:
evaluate_and_show_examples(encoder, decoder, valid_dataloader, criterion)

Input: Il étudie l'histoire, Target: He studies history, Predicted: He studies history
Input: Elle écrit de la poésie pendant son temps libre, Target: She writes poetry in her free time, Predicted: She writes poetry in her free free time
Input: Nous jouons de la musique au concert, Target: We play music at the concert, Predicted: We play music at the concert
Input: Nous voyageons en train, Target: We travel by train, Predicted: We travel by train
Input: Ils parlent différentes langues, Target: They speak different languages, Predicted: They speak different languages
Input: Ils voyagent autour du monde, Target: They travel around the world, Predicted: They travel around the world
Input: Nous parlons au téléphone, Target: We talk on the phone, Predicted: We talk on the phone
Input: Elle rêve de voler, Target: She dreams of flying, Predicted: She dreams of flying
Input: Ils nagent dans la piscine, Target: They swim in the pool, Predicted: They swim in the pool
Input: Il chante dans le chœ

<h2>French to English Encoder/Decoder GRU Model with Attention</h2>

<h3>Model Training and Inferencing Function:</h3>

In [63]:
class Encoder(nn.Module):
    """The Encoder part of the seq2seq model."""
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        # Forward pass for the encoder
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        # Initializes hidden state
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
class AttnDecoder(nn.Module):
    """Decoder with attention mechanism."""
    def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        # Attention weights
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        # Combine embedded input and context vector
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        # Calculate attention weights
        attn_weights = torch.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        # Apply attention weights to encoder outputs to get context
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = torch.relu(output)
        output, hidden = self.gru(output, hidden)

        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

<h3>Hyperparameters and Training:</h3>

In [64]:
def attn_train_fn(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    # Encode each character in the input
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    # Decoder's first input is the SOS token
    decoder_input = torch.tensor([[SOS_token]], device=device)

    # Initial decoder hidden state is encoder's last hidden state
    decoder_hidden = encoder_hidden

    # Decoder with attention
    for di in range(target_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()  # Detach from history as input

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == EOS_token:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def training(encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, dataloader, epochs, device):
    for epoch in range(epochs):
        total_loss = 0
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            # Move tensors to the correct device
            input_tensor = input_tensor[0].to(device)
            target_tensor = target_tensor[0].to(device)

            # Perform a single training step and update total loss
            loss = attn_train_fn(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length)
            total_loss += loss

        # Print loss every 10 epochs
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {total_loss / len(dataloader)}')

In [65]:
input_size = len(dataset.fr_vocab.word2index)
hidden_size = 256
output_size =  len(dataset.eng_vocab.word2index)
max_length = 14

lr = 0.0001
epochs = 50

encoder = Encoder(input_size, hidden_size).to(device)
attn_decoder = AttnDecoder(hidden_size, output_size, max_length).to(device)

encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr)
decoder_optimizer = optim.Adam(attn_decoder.parameters(), lr=lr)

criterion = nn.NLLLoss()

training(encoder, attn_decoder, encoder_optimizer, decoder_optimizer, criterion, train_dataloader, epochs, device)

Epoch 0, Loss: 3.733839702812624
Epoch 10, Loss: 2.3674116421129203
Epoch 20, Loss: 1.42018545785655
Epoch 30, Loss: 0.7307021817935417
Epoch 40, Loss: 0.34352856872622883


<h3>Evaluating and Comparing Target vs Predictions:</h3>

In [66]:
def evaluate_and_show_examples(encoder, decoder, dataloader, criterion, n_examples=10):
    encoder.eval()
    decoder.eval()

    fr_vocab = dataloader.dataset.fr_vocab
    eng_vocab = dataloader.dataset.eng_vocab
    
    total_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for i, (input_tensor, target_tensor) in enumerate(dataloader):
            input_tensor = input_tensor[0].to(device)
            target_tensor = target_tensor[0].to(device)

            encoder_hidden = encoder.initHidden()
            input_length = input_tensor.size(0)
            target_length = target_tensor.size(0)

            encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

            loss = 0

            # Encode input
            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[ei].unsqueeze(0), encoder_hidden)
                encoder_outputs[ei] = encoder_output[0, 0]

            # Decode with attention
            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden

            predicted_indices = []

            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topv, topi = decoder_output.topk(1)
                predicted_indices.append(topi.item())
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
                if decoder_input.item() == EOS_token:
                    break

            total_loss += loss.item() / target_length
            if predicted_indices == target_tensor.tolist():
                correct_predictions += 1

            # Optionally, print some examples
            if i < n_examples:
                predicted_sentence = ' '.join([eng_vocab.index2word[index] for index in predicted_indices if index not in (SOS_token, EOS_token)])
                target_sentence = ' '.join([eng_vocab.index2word[index.item()] for index in target_tensor if index.item() not in (SOS_token, EOS_token)])
                input_sentence = ' '.join([fr_vocab.index2word[index.item()] for index in input_tensor if index.item() not in (SOS_token, EOS_token)])

                print(f'Input: {input_sentence}, Target: {target_sentence}, Predicted: {predicted_sentence}')

        # Print overall evaluation results
        average_loss = total_loss / len(dataloader)
        accuracy = correct_predictions / len(dataloader)
        print(f'Evaluation Loss: {average_loss:.4f}, Accuracy: {100*accuracy:.2f}%')

In [67]:
evaluate_and_show_examples(encoder, attn_decoder, valid_dataloader, criterion)

Input: Le bébé pleure, Target: The baby cries, Predicted: The baby cries
Input: Le chat dort, Target: The cat is sleeping, Predicted: The cat is sleeping
Input: Il a faim, Target: He is hungry, Predicted: He is hungry
Input: Elle enseigne l'anglais à l'école, Target: She teaches English at school, Predicted: She teaches English at school
Input: L'horloge tic-tac bruyamment, Target: The clock ticks loudly, Predicted: The clock ticks loudly
Input: Tu es fatigué, Target: You are tired, Predicted: You are tired
Input: Elle peint un tableau, Target: She paints a picture, Predicted: She paints a picture
Input: Il écrit une lettre, Target: He writes a letter, Predicted: He writes a letter
Input: Elle écrit de la poésie pendant son temps libre, Target: She writes poetry in her free time, Predicted: She writes poetry in her free time
Input: Ils lisent des livres à la bibliothèque, Target: They read books at the library, Predicted: They read books at the library
Evaluation Loss: 0.1632, Accuracy