<a href="https://colab.research.google.com/github/JingchenYan1/Real-Time-ML/blob/main/Homework4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
!pip install python-docx

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
from docx import Document

doc_path = "/content/drive/My Drive/Dataset - English to French.docx"
doc = Document(doc_path)
raw_text = "\n".join([para.text for para in doc.paragraphs])

namespace = {}
exec(raw_text, namespace)
english_to_french = namespace['english_to_french']


In [44]:
import pandas as pd

df = pd.DataFrame(english_to_french, columns=["English", "French"])

import re
def preprocess(sentence):
    sentence = sentence.lower().strip()
    sentence = re.sub(r"[^a-zA-ZÀ-ÿ\s]", "", sentence)
    tokens = sentence.split()
    return tokens

df["English_tokens"] = df["English"].apply(preprocess)
df["French_tokens"] = df["French"].apply(preprocess)


In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class Vocab:
    def __init__(self, tokens_list, min_freq=1):
        self.word2index = {"<pad>":0, "<sos>":1, "<eos>":2, "<unk>":3}
        self.index2word = {0:"<pad>", 1:"<sos>", 2:"<eos>", 3:"<unk>"}
        self.word_freq = {}
        self.min_freq = min_freq
        self.build_vocab(tokens_list)

    def build_vocab(self, tokens_list):
        idx = len(self.word2index)
        for tokens in tokens_list:
            for word in tokens:
                self.word_freq[word] = self.word_freq.get(word, 0) + 1
        for word, freq in self.word_freq.items():
            if freq >= self.min_freq:
                self.word2index[word] = idx
                self.index2word[idx] = word
                idx += 1

    def numericalize(self, tokens):
        return [self.word2index.get(word, self.word2index["<unk>"]) for word in tokens]

english_vocab = Vocab(df["English_tokens"])
french_vocab = Vocab(df["French_tokens"])

class TranslationDataset(Dataset):
    def __init__(self, df, src_col, tgt_col, src_vocab, tgt_vocab):
        self.df = df
        self.src_col = src_col
        self.tgt_col = tgt_col
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        src_tokens = self.df.iloc[idx][self.src_col]
        tgt_tokens = self.df.iloc[idx][self.tgt_col]
        src_indices = self.src_vocab.numericalize(src_tokens)
        tgt_indices = [self.tgt_vocab.word2index["<sos>"]] + self.tgt_vocab.numericalize(tgt_tokens) + [self.tgt_vocab.word2index["<eos>"]]
        return torch.tensor(src_indices), torch.tensor(tgt_indices)

def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(s) for s in src_batch]
    tgt_lens = [len(t) for t in tgt_batch]
    src_padded = nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=english_vocab.word2index["<pad>"])
    tgt_padded = nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=french_vocab.word2index["<pad>"])
    return src_padded, tgt_padded, src_lens, tgt_lens

dataset_en2fr = TranslationDataset(df, "English_tokens", "French_tokens", english_vocab, french_vocab)
dataloader_en2fr = DataLoader(dataset_en2fr, batch_size=4, shuffle=True, collate_fn=collate_fn)


In [46]:
def compute_accuracy(output, tgt, pad_idx):
    pred_tokens = output.argmax(dim=-1)
    mask = (tgt != pad_idx)
    correct = (pred_tokens == tgt) * mask
    return correct.sum().item() / mask.sum().item()


In [47]:
# Problem 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, num_layers=1):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, num_layers, batch_first=True)
    def forward(self, src, src_lens):
        embedded = self.embedding(src)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, src_lens, batch_first=True, enforce_sorted=False)
        outputs, hidden = self.gru(packed)
        return hidden

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, num_layers=1):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, num_layers, batch_first=True)
        self.fc_out = nn.Linear(hid_dim, output_dim)
    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # [batch, 1]
        embedded = self.embedding(input)
        output, hidden = self.gru(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    def forward(self, src, src_lens, tgt, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        tgt_len = tgt.size(1)
        tgt_vocab_size = self.decoder.fc_out.out_features
        outputs = torch.zeros(batch_size, tgt_len, tgt_vocab_size).to(self.device)
        hidden = self.encoder(src, src_lens)
        input_token = tgt[:, 0]  # <sos>
        for t in range(1, tgt_len):
            output, hidden = self.decoder(input_token, hidden)
            outputs[:, t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input_token = tgt[:, t] if teacher_force else top1
        return outputs

INPUT_DIM = len(english_vocab.word2index)
OUTPUT_DIM = len(french_vocab.word2index)
EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 1
NUM_EPOCHS = 10

encoder_p1 = Encoder(INPUT_DIM, EMB_DIM, HID_DIM, N_LAYERS).to(device)
decoder_p1 = Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM, N_LAYERS).to(device)
model_p1 = Seq2Seq(encoder_p1, decoder_p1, device).to(device)
optimizer_p1 = optim.Adam(model_p1.parameters())
criterion_p1 = nn.CrossEntropyLoss(ignore_index=french_vocab.word2index["<pad>"])

def train_model(model, dataloader, optimizer, criterion, clip=1):
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    for src, tgt, src_lens, tgt_lens in dataloader:
        src, tgt = src.to(device), tgt.to(device)
        optimizer.zero_grad()
        output = model(src, src_lens, tgt)
        output_dim = output.shape[-1]
        output_for_loss = output[:, 1:].reshape(-1, output_dim)
        tgt_for_loss = tgt[:, 1:].reshape(-1)
        loss = criterion(output_for_loss, tgt_for_loss)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        acc = compute_accuracy(output[:, 1:], tgt[:, 1:], french_vocab.word2index["<pad>"])
        epoch_loss += loss.item()
        epoch_acc += acc
    return epoch_loss / len(dataloader), epoch_acc / len(dataloader)

def evaluate_model(model, dataloader, criterion):
    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    with torch.no_grad():
        for src, tgt, src_lens, tgt_lens in dataloader:
            src, tgt = src.to(device), tgt.to(device)
            output = model(src, src_lens, tgt, teacher_forcing_ratio=0)
            output_dim = output.shape[-1]
            output_for_loss = output[:, 1:].reshape(-1, output_dim)
            tgt_for_loss = tgt[:, 1:].reshape(-1)
            loss = criterion(output_for_loss, tgt_for_loss)
            acc = compute_accuracy(output[:, 1:], tgt[:, 1:], french_vocab.word2index["<pad>"])
            epoch_loss += loss.item()
            epoch_acc += acc
    return epoch_loss / len(dataloader), epoch_acc / len(dataloader)

print("=== Problem 1: English -> French (Seq2Seq without Attention) ===")
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_model(model_p1, dataloader_en2fr, optimizer_p1, criterion_p1)
    val_loss, val_acc = evaluate_model(model_p1, dataloader_en2fr, criterion_p1)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

def translate_sentence(model, sentence, src_vocab, tgt_vocab, max_len=20):
    model.eval()
    tokens = preprocess(sentence)
    indices = src_vocab.numericalize(tokens)
    src_tensor = torch.tensor(indices).unsqueeze(0).to(device)
    src_len = [len(indices)]
    with torch.no_grad():
        hidden = model.encoder(src_tensor, src_len)
    input_token = torch.tensor([tgt_vocab.word2index["<sos>"]]).to(device)
    translated_sentence = []
    for _ in range(max_len):
        with torch.no_grad():
            output, hidden = model.decoder(input_token, hidden)
        top1 = output.argmax(1)
        if top1.item() == tgt_vocab.word2index["<eos>"]:
            break
        translated_sentence.append(tgt_vocab.index2word[top1.item()])
        input_token = top1
    return " ".join(translated_sentence)


=== Problem 1: English -> French (Seq2Seq without Attention) ===
Epoch 1: Train Loss = 4.8085, Train Acc = 0.2230, Val Loss = 3.7276, Val Acc = 0.3338
Epoch 2: Train Loss = 3.6108, Train Acc = 0.3646, Val Loss = 3.0196, Val Acc = 0.3971
Epoch 3: Train Loss = 2.9283, Train Acc = 0.3991, Val Loss = 2.3581, Val Acc = 0.5337
Epoch 4: Train Loss = 2.2678, Train Acc = 0.5104, Val Loss = 1.5555, Val Acc = 0.7365
Epoch 5: Train Loss = 1.5296, Train Acc = 0.7005, Val Loss = 0.9204, Val Acc = 0.8675
Epoch 6: Train Loss = 0.8698, Train Acc = 0.8495, Val Loss = 0.4270, Val Acc = 0.9664
Epoch 7: Train Loss = 0.4726, Train Acc = 0.9250, Val Loss = 0.2137, Val Acc = 0.9744
Epoch 8: Train Loss = 0.2355, Train Acc = 0.9584, Val Loss = 0.1251, Val Acc = 0.9824
Epoch 9: Train Loss = 0.1302, Train Acc = 0.9788, Val Loss = 0.0657, Val Acc = 0.9904
Epoch 10: Train Loss = 0.0505, Train Acc = 0.9954, Val Loss = 0.0324, Val Acc = 0.9983


In [51]:
test_sentences = [
    "She wears a red dress and dances at the party",
    "After they visit the museum, they play video games",
    "Although he is tired, he works hard every day",
    "She sings a song while cooking dinner",
    "We eat breakfast together before we go to the gym",
    "He said that the coffee is hot",
    "She thinks that the teacher explains the lesson well",
    "They do not enjoy the sunset when it rains",
    "She is not happy because the cat is sleeping on her dress",
    "He plays the guitar and also sings in the choir"
]

print("=== Problem 1: Complex sentence translation test ===")
for sentence in test_sentences:
    print(f"\nInput: {sentence}")
    output = translate_sentence(model_p1, sentence, english_vocab, french_vocab)
    print(f"Output: {output}")

=== Problem 1: Complex sentence translation test ===

Input: She wears a red dress and dances at the party
Output: elle danse à la fête

Input: After they visit the museum, they play video games
Output: ils jouent aux jeux vidéo

Input: Although he is tired, he works hard every day
Output: il travaille dur tous les jours

Input: She sings a song while cooking dinner
Output: elle écrit de la poésie pendant son temps libre

Input: We eat breakfast together before we go to the gym
Output: nous montons les escaliers de la musique

Input: He said that the coffee is hot
Output: le café est chaud

Input: She thinks that the teacher explains the lesson well
Output: elle nourrit le chat

Input: They do not enjoy the sunset when it rains
Output: le professeur explique la leçon

Input: She is not happy because the cat is sleeping on her dress
Output: elle porte une robe rouge

Input: He plays the guitar and also sings in the choir
Output: il chante dans le chur


In [49]:
# Problem 2
def translate_sentence_attn(model, sentence, src_vocab, tgt_vocab, max_len=20):
    model.eval()
    tokens = preprocess(sentence)
    indices = src_vocab.numericalize(tokens)
    src_tensor = torch.tensor(indices).unsqueeze(0).to(device)
    src_len = [len(indices)]
    with torch.no_grad():
        encoder_outputs, hidden = model.encoder(src_tensor, src_len)
    input_token = torch.tensor([tgt_vocab.word2index["<sos>"]]).to(device)
    translated_sentence = []
    for _ in range(max_len):
        with torch.no_grad():
            output, hidden = model.decoder(input_token, hidden, encoder_outputs)
        top1 = output.argmax(1)
        if top1.item() == tgt_vocab.word2index["<eos>"]:
            break
        translated_sentence.append(tgt_vocab.index2word[top1.item()])
        input_token = top1
    return " ".join(translated_sentence)

class EncoderAttn(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, num_layers=1):
        super(EncoderAttn, self).__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, num_layers, batch_first=True)
    def forward(self, src, src_lens):
        embedded = self.embedding(src)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, src_lens, batch_first=True, enforce_sorted=False)
        outputs, hidden = self.gru(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
        return outputs, hidden

class Attention(nn.Module):
    def __init__(self, hid_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hid_dim*2, hid_dim)
        self.v = nn.Linear(hid_dim, 1, bias=False)
    def forward(self, hidden, encoder_outputs, mask=None):
        batch_size = encoder_outputs.shape[0]
        src_len = encoder_outputs.shape[1]
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = self.v(energy).squeeze(2)
        if mask is not None:
            attention = attention.masked_fill(mask==0, -1e10)
        return torch.softmax(attention, dim=1)

class AttnDecoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, attention, num_layers=1):
        super(AttnDecoder, self).__init__()
        self.output_dim = output_dim
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU(emb_dim + hid_dim, hid_dim, num_layers, batch_first=True)
        self.fc_out = nn.Linear(hid_dim*2, output_dim)
        self.attention = attention
    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(1)
        embedded = self.embedding(input)
        attn_weights = self.attention(hidden[-1], encoder_outputs)
        attn_weights = attn_weights.unsqueeze(1)
        context = torch.bmm(attn_weights, encoder_outputs)
        gru_input = torch.cat((embedded, context), dim=2)
        output, hidden = self.gru(gru_input, hidden)
        output = output.squeeze(1)
        context = context.squeeze(1)
        prediction = self.fc_out(torch.cat((output, context), dim=1))
        return prediction, hidden

class Seq2SeqAttn(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2SeqAttn, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    def forward(self, src, src_lens, tgt, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        tgt_len = tgt.size(1)
        tgt_vocab_size = self.decoder.output_dim
        encoder_outputs, hidden = self.encoder(src, src_lens)
        outputs = torch.zeros(batch_size, tgt_len, tgt_vocab_size).to(self.device)
        input_token = tgt[:, 0]
        for t in range(1, tgt_len):
            output, hidden = self.decoder(input_token, hidden, encoder_outputs)
            outputs[:, t] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input_token = tgt[:, t] if teacher_force else top1
        return outputs

encoder_attn = EncoderAttn(INPUT_DIM, EMB_DIM, HID_DIM, N_LAYERS).to(device)
attn = Attention(HID_DIM)
decoder_attn = AttnDecoder(OUTPUT_DIM, EMB_DIM, HID_DIM, attn, N_LAYERS).to(device)
model_p2 = Seq2SeqAttn(encoder_attn, decoder_attn, device).to(device)
optimizer_p2 = optim.Adam(model_p2.parameters())
criterion_p2 = nn.CrossEntropyLoss(ignore_index=french_vocab.word2index["<pad>"])

print("\n=== Problem 2: English -> French with Attention ===")
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_model(model_p2, dataloader_en2fr, optimizer_p2, criterion_p2)
    val_loss, val_acc = evaluate_model(model_p2, dataloader_en2fr, criterion_p2)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")



=== Problem 2: English -> French with Attention ===
Epoch 1: Train Loss = 4.6739, Train Acc = 0.2532, Val Loss = 3.5438, Val Acc = 0.3337
Epoch 2: Train Loss = 3.3709, Train Acc = 0.3636, Val Loss = 2.5394, Val Acc = 0.4424
Epoch 3: Train Loss = 2.4764, Train Acc = 0.4300, Val Loss = 1.6354, Val Acc = 0.6524
Epoch 4: Train Loss = 1.6022, Train Acc = 0.6363, Val Loss = 0.8212, Val Acc = 0.8273
Epoch 5: Train Loss = 0.8078, Train Acc = 0.8097, Val Loss = 0.5095, Val Acc = 0.8731
Epoch 6: Train Loss = 0.5374, Train Acc = 0.8486, Val Loss = 0.4439, Val Acc = 0.8875
Epoch 7: Train Loss = 0.4456, Train Acc = 0.8906, Val Loss = 0.2803, Val Acc = 0.9133
Epoch 8: Train Loss = 0.2553, Train Acc = 0.9331, Val Loss = 0.1743, Val Acc = 0.9599
Epoch 9: Train Loss = 0.1811, Train Acc = 0.9557, Val Loss = 0.1850, Val Acc = 0.9463
Epoch 10: Train Loss = 0.1277, Train Acc = 0.9664, Val Loss = 0.0559, Val Acc = 0.9839


In [52]:
test_sentences = [
    "She wears a red dress and dances at the party",
    "After they visit the museum, they play video games",
    "Although he is tired, he works hard every day",
    "She sings a song while cooking dinner",
    "We eat breakfast together before we go to the gym",
    "He said that the coffee is hot",
    "She thinks that the teacher explains the lesson well",
    "They do not enjoy the sunset when it rains",
    "She is not happy because the cat is sleeping on her dress",
    "He plays the guitar and also sings in the choir"
]

print("\n=== Problem 2: Complex sentence translation test (with Attention) ===")
for sentence in test_sentences:
    print(f"\nInput: {sentence}")
    output = translate_sentence_attn(model_p2, sentence, english_vocab, french_vocab)
    print(f"Output: {output}")


=== Problem 2: Complex sentence translation test (with Attention) ===

Input: She wears a red dress and dances at the party
Output: elle porte une robe rouge

Input: After they visit the museum, they play video games
Output: ils jouent aux jeux vidéo

Input: Although he is tired, he works hard every day
Output: il travaille dur tous les jours

Input: She sings a song while cooking dinner
Output: elle chante une chanson

Input: We eat breakfast together before we go to the gym
Output: nous prenons le petit déjeuner ensemble

Input: He said that the coffee is hot
Output: le café est chaud

Input: She thinks that the teacher explains the lesson well
Output: elle porte le long

Input: They do not enjoy the sunset when it rains
Output: ils apprécient le coucher du soleil

Input: She is not happy because the cat is sleeping on her dress
Output: elle porte le chat

Input: He plays the guitar and also sings in the choir
Output: il chante dans le chur


In [53]:
# Problem 3
dataset_fr2en = TranslationDataset(df, "French_tokens", "English_tokens", french_vocab, english_vocab)
dataloader_fr2en = DataLoader(dataset_fr2en, batch_size=4, shuffle=True, collate_fn=collate_fn)

INPUT_DIM_fr = len(french_vocab.word2index)
OUTPUT_DIM_fr = len(english_vocab.word2index)

encoder_fr = Encoder(INPUT_DIM_fr, EMB_DIM, HID_DIM, N_LAYERS).to(device)
decoder_fr = Decoder(OUTPUT_DIM_fr, EMB_DIM, HID_DIM, N_LAYERS).to(device)
model_fr = Seq2Seq(encoder_fr, decoder_fr, device).to(device)
optimizer_fr = optim.Adam(model_fr.parameters())
criterion_fr = nn.CrossEntropyLoss(ignore_index=english_vocab.word2index["<pad>"])

print("\n=== Problem 3: French -> English (Seq2Seq without Attention) ===")
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_model(model_fr, dataloader_fr2en, optimizer_fr, criterion_fr)
    val_loss, val_acc = evaluate_model(model_fr, dataloader_fr2en, criterion_fr)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

def translate_sentence_fr2en(model, sentence, src_vocab, tgt_vocab, max_len=20):
    model.eval()
    tokens = preprocess(sentence)
    indices = src_vocab.numericalize(tokens)
    src_tensor = torch.tensor(indices).unsqueeze(0).to(device)
    src_len = [len(indices)]
    with torch.no_grad():
        hidden = model.encoder(src_tensor, src_len)
    input_token = torch.tensor([tgt_vocab.word2index["<sos>"]]).to(device)
    translated_sentence = []
    for _ in range(max_len):
        with torch.no_grad():
            output, hidden = model.decoder(input_token, hidden)
        top1 = output.argmax(1)
        if top1.item() == tgt_vocab.word2index["<eos>"]:
            break
        translated_sentence.append(tgt_vocab.index2word[top1.item()])
        input_token = top1
    return " ".join(translated_sentence)

def translate_sentence_fr2en_attn(model, sentence, src_vocab, tgt_vocab, max_len=20):
    model.eval()
    tokens = preprocess(sentence)
    indices = src_vocab.numericalize(tokens)
    src_tensor = torch.tensor(indices).unsqueeze(0).to(device)
    src_len = [len(indices)]
    with torch.no_grad():
        encoder_outputs, hidden = model.encoder(src_tensor, src_len)
    input_token = torch.tensor([tgt_vocab.word2index["<sos>"]]).to(device)
    translated_sentence = []
    for _ in range(max_len):
        with torch.no_grad():
            output, hidden = model.decoder(input_token, hidden, encoder_outputs)
        top1 = output.argmax(1)
        if top1.item() == tgt_vocab.word2index["<eos>"]:
            break
        translated_sentence.append(tgt_vocab.index2word[top1.item()])
        input_token = top1
    return " ".join(translated_sentence)



=== Problem 3: French -> English (Seq2Seq without Attention) ===
Epoch 1: Train Loss = 4.4789, Train Acc = 0.2626, Val Loss = 3.4880, Val Acc = 0.3713
Epoch 2: Train Loss = 3.2937, Train Acc = 0.4119, Val Loss = 2.8151, Val Acc = 0.4482
Epoch 3: Train Loss = 2.6487, Train Acc = 0.4528, Val Loss = 2.1485, Val Acc = 0.5673
Epoch 4: Train Loss = 2.0861, Train Acc = 0.5191, Val Loss = 1.4403, Val Acc = 0.7352
Epoch 5: Train Loss = 1.4001, Train Acc = 0.7143, Val Loss = 0.8121, Val Acc = 0.8921
Epoch 6: Train Loss = 0.8294, Train Acc = 0.8574, Val Loss = 0.4090, Val Acc = 0.9522
Epoch 7: Train Loss = 0.4071, Train Acc = 0.9378, Val Loss = 0.1964, Val Acc = 0.9826
Epoch 8: Train Loss = 0.1910, Train Acc = 0.9787, Val Loss = 0.0653, Val Acc = 1.0000
Epoch 9: Train Loss = 0.0742, Train Acc = 0.9956, Val Loss = 0.0341, Val Acc = 1.0000
Epoch 10: Train Loss = 0.0398, Train Acc = 0.9964, Val Loss = 0.0214, Val Acc = 1.0000


In [54]:
test_sentences_fr = [
    "Elle porte une robe rouge et danse à la fête",
    "Après avoir visité le musée, ils jouent aux jeux vidéo",
    "Bien qu’il soit fatigué, il travaille dur tous les jours",
    "Elle chante une chanson en cuisinant le dîner",
    "Nous prenons le petit déjeuner ensemble avant d’aller à la salle de sport",
    "Il a dit que le café est chaud",
    "Elle pense que le professeur explique bien la leçon",
    "Ils n’apprécient pas le coucher du soleil quand il pleut",
    "Elle n’est pas heureuse parce que le chat dort sur sa robe",
    "Il joue de la guitare et chante aussi dans le chœur"
]

print("\n=== Problem 3: French -> English (without Attention) ===")
for sentence in test_sentences_fr:
    print(f"\nInput: {sentence}")
    output = translate_sentence_fr2en(model_fr, sentence, french_vocab, english_vocab)
    print(f"Output: {output}")



=== Problem 3: French -> English (without Attention) ===

Input: Elle porte une robe rouge et danse à la fête
Output: she dances at the party

Input: Après avoir visité le musée, ils jouent aux jeux vidéo
Output: the play play in the evening

Input: Bien qu’il soit fatigué, il travaille dur tous les jours
Output: he works hard every day

Input: Elle chante une chanson en cuisinant le dîner
Output: she catches the bus

Input: Nous prenons le petit déjeuner ensemble avant d’aller à la salle de sport
Output: we play music at the concert

Input: Il a dit que le café est chaud
Output: the coffee is hot

Input: Elle pense que le professeur explique bien la leçon
Output: she teacher the the lesson

Input: Ils n’apprécient pas le coucher du soleil quand il pleut
Output: he shaves in the morning

Input: Elle n’est pas heureuse parce que le chat dort sur sa robe
Output: she studies hard for exams

Input: Il joue de la guitare et chante aussi dans le chœur
Output: he sings in the choir


In [55]:
def translate_sentence_fr2en_attn(model, sentence, src_vocab, tgt_vocab, max_len=20):
    model.eval()
    tokens = preprocess(sentence)
    indices = src_vocab.numericalize(tokens)
    src_tensor = torch.tensor(indices).unsqueeze(0).to(device)
    src_len = [len(indices)]
    with torch.no_grad():
        encoder_outputs, hidden = model.encoder(src_tensor, src_len)
    input_token = torch.tensor([tgt_vocab.word2index["<sos>"]]).to(device)
    translated_sentence = []
    for _ in range(max_len):
        with torch.no_grad():
            output, hidden = model.decoder(input_token, hidden, encoder_outputs)
        top1 = output.argmax(1)
        if top1.item() == tgt_vocab.word2index["<eos>"]:
            break
        translated_sentence.append(tgt_vocab.index2word[top1.item()])
        input_token = top1
    return " ".join(translated_sentence)

encoder_fr_attn = EncoderAttn(INPUT_DIM_fr, EMB_DIM, HID_DIM, N_LAYERS).to(device)
attn_fr = Attention(HID_DIM)
decoder_fr_attn = AttnDecoder(OUTPUT_DIM_fr, EMB_DIM, HID_DIM, attn_fr, N_LAYERS).to(device)
model_fr_attn = Seq2SeqAttn(encoder_fr_attn, decoder_fr_attn, device).to(device)
optimizer_fr_attn = optim.Adam(model_fr_attn.parameters())
criterion_fr_attn = nn.CrossEntropyLoss(ignore_index=english_vocab.word2index["<pad>"])

print("\n=== Problem 3: French -> English with Attention ===")
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_model(model_fr_attn, dataloader_fr2en, optimizer_fr_attn, criterion_fr_attn)
    val_loss, val_acc = evaluate_model(model_fr_attn, dataloader_fr2en, criterion_fr_attn)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")


=== Problem 3: French -> English with Attention ===
Epoch 1: Train Loss = 4.3309, Train Acc = 0.2909, Val Loss = 3.2030, Val Acc = 0.3790
Epoch 2: Train Loss = 3.0452, Train Acc = 0.4091, Val Loss = 2.2627, Val Acc = 0.5453
Epoch 3: Train Loss = 2.1495, Train Acc = 0.5363, Val Loss = 1.3278, Val Acc = 0.7356
Epoch 4: Train Loss = 1.3558, Train Acc = 0.6770, Val Loss = 0.7374, Val Acc = 0.8518
Epoch 5: Train Loss = 0.7698, Train Acc = 0.8352, Val Loss = 0.4415, Val Acc = 0.8972
Epoch 6: Train Loss = 0.4246, Train Acc = 0.8824, Val Loss = 0.2623, Val Acc = 0.9380
Epoch 7: Train Loss = 0.3135, Train Acc = 0.9291, Val Loss = 0.1550, Val Acc = 0.9627
Epoch 8: Train Loss = 0.1887, Train Acc = 0.9544, Val Loss = 0.2476, Val Acc = 0.9435
Epoch 9: Train Loss = 0.1365, Train Acc = 0.9665, Val Loss = 0.1238, Val Acc = 0.9635
Epoch 10: Train Loss = 0.0946, Train Acc = 0.9755, Val Loss = 0.0214, Val Acc = 0.9970


In [56]:
print("\n=== Problem 3: French -> English (with Attention) ===")
for sentence in test_sentences_fr:
    print(f"\nInput: {sentence}")
    output = translate_sentence_fr2en_attn(model_fr_attn, sentence, french_vocab, english_vocab)
    print(f"Output: {output}")



=== Problem 3: French -> English (with Attention) ===

Input: Elle porte une robe rouge et danse à la fête
Output: she wears a red dress

Input: Après avoir visité le musée, ils jouent aux jeux vidéo
Output: they play to the weekend

Input: Bien qu’il soit fatigué, il travaille dur tous les jours
Output: he works hard every day

Input: Elle chante une chanson en cuisinant le dîner
Output: she sings dinner

Input: Nous prenons le petit déjeuner ensemble avant d’aller à la salle de sport
Output: we eat breakfast together

Input: Il a dit que le café est chaud
Output: he is for the morning

Input: Elle pense que le professeur explique bien la leçon
Output: she walks along the beach

Input: Ils n’apprécient pas le coucher du soleil quand il pleut
Output: he listens in the morning

Input: Elle n’est pas heureuse parce que le chat dort sur sa robe
Output: she walks along the beach

Input: Il joue de la guitare et chante aussi dans le chœur
Output: he sings in the choir
