In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import math
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

########Reversed French → English dataset
data = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")
]

#Tokenizer & Vocab
def tokenize(text):
    return text.lower().strip().split()

class Vocab:
    def __init__(self, texts, specials=["<pad>", "<bos>", "<eos>", "<unk>"]):
        tokens = [tok for sent in texts for tok in tokenize(sent)]
        self.itos = specials + sorted(set(tokens))
        self.stoi = {tok: i for i, tok in enumerate(self.itos)}
        self.pad = self.stoi["<pad>"]
        self.bos = self.stoi["<bos>"]
        self.eos = self.stoi["<eos>"]
        self.unk = self.stoi["<unk>"]

    def encode(self, text):
        return [self.bos] + [self.stoi.get(tok, self.unk) for tok in tokenize(text)] + [self.eos]

    def decode(self, ids):
        return " ".join([self.itos[i] for i in ids if i not in [self.bos, self.eos, self.pad]])

    def __len__(self):
        return len(self.itos)

fr_vocab = Vocab([fr for fr, en in data])  # source
en_vocab = Vocab([en for fr, en in data])  # target

#Dataset & Collate
class TranslationDataset(Dataset):
    def __init__(self, pairs):
        self.pairs = pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        fr, en = self.pairs[idx]
        return torch.tensor(fr_vocab.encode(fr)), torch.tensor(en_vocab.encode(en))

def collate_fn(batch):
    src, tgt = zip(*batch)
    src = pad_sequence(src, batch_first=True, padding_value=fr_vocab.pad)
    tgt = pad_sequence(tgt, batch_first=True, padding_value=en_vocab.pad)
    return src.to(device), tgt.to(device)

dataset = TranslationDataset(data)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)


Using device: cuda


In [2]:
######Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size, max_len=5000):
        super().__init__()
        pos = torch.arange(0, max_len).unsqueeze(1)
        div = torch.exp(torch.arange(0, emb_size, 2) * (-math.log(10000.0) / emb_size))
        pe = torch.zeros(max_len, emb_size)
        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)
        self.pe = pe.unsqueeze(0).to(device)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


In [3]:
######Transformer Encoder-Decoder for FR → EN

class TransformerModel(nn.Module):
    def __init__(self, fr_vocab_size, en_vocab_size, emb_size=128, num_heads=2, num_layers=2):
        super().__init__()
        self.src_emb = nn.Embedding(fr_vocab_size, emb_size)
        self.tgt_emb = nn.Embedding(en_vocab_size, emb_size)
        self.pos_enc = PositionalEncoding(emb_size)
        self.transformer = nn.Transformer(
            d_model=emb_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=512,
            dropout=0.1,
            batch_first=True
        )
        self.fc = nn.Linear(emb_size, en_vocab_size)

    def forward(self, src, tgt):
        src_mask = self.transformer.generate_square_subsequent_mask(src.size(1)).to(device)
        tgt_mask = self.transformer.generate_square_subsequent_mask(tgt.size(1)).to(device)
        src = self.pos_enc(self.src_emb(src))
        tgt = self.pos_enc(self.tgt_emb(tgt))
        out = self.transformer(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
        return self.fc(out)


In [4]:
#####Training Function

def train_seq2seq(model, loader, epochs=5):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=en_vocab.pad)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct, total = 0, 0

        for src, tgt in loader:
            tgt_input = tgt[:, :-1]
            tgt_expected = tgt[:, 1:]

            optimizer.zero_grad()
            output = model(src, tgt_input)
            output = output.reshape(-1, output.shape[-1])
            tgt_expected = tgt_expected.reshape(-1)

            loss = criterion(output, tgt_expected)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (output.argmax(1) == tgt_expected).sum().item()
            total += tgt_expected.ne(en_vocab.pad).sum().item()

        acc = correct / total
        print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")


In [5]:
###Translate French → English

def translate(model, sentence, max_len=20):
    model.eval()
    src = torch.tensor(fr_vocab.encode(sentence)).unsqueeze(0).to(device)
    tgt = torch.tensor([[en_vocab.bos]]).to(device)

    for _ in range(max_len):
        with torch.no_grad():
            out = model(src, tgt)
            next_token = out[:, -1, :].argmax(dim=1, keepdim=True)
            tgt = torch.cat([tgt, next_token], dim=1)
            if next_token.item() == en_vocab.eos:
                break

    return en_vocab.decode(tgt[0].tolist())


In [6]:
model = TransformerModel(len(fr_vocab), len(en_vocab), emb_size=128, num_heads=2, num_layers=2)
train_seq2seq(model, train_loader, epochs=10)

print("\nSample Translations (FR → EN):")
print("FR: Elle est heureuse\nEN:", translate(model, "Elle est heureuse"))
print("FR: Nous sommes amis\nEN:", translate(model, "Nous sommes amis"))


Epoch 1 | Loss: 273.9030 | Accuracy: 0.1889
Epoch 2 | Loss: 240.2113 | Accuracy: 0.2504
Epoch 3 | Loss: 208.8282 | Accuracy: 0.3379
Epoch 4 | Loss: 182.4458 | Accuracy: 0.3656
Epoch 5 | Loss: 162.0054 | Accuracy: 0.3932
Epoch 6 | Loss: 138.7531 | Accuracy: 0.4516
Epoch 7 | Loss: 117.4223 | Accuracy: 0.5084
Epoch 8 | Loss: 101.2934 | Accuracy: 0.6006
Epoch 9 | Loss: 89.3477 | Accuracy: 0.6359
Epoch 10 | Loss: 78.4340 | Accuracy: 0.6605

Sample Translations (FR → EN):
FR: Elle est heureuse
EN: la lune brille intensément
FR: Nous sommes amis
EN: la lune brille intensément


In [7]:
###########Train and Evaluate 8 Transformer Configs#####

In [8]:
def run_transformer_fr_to_en_configs():
    results = []
    configs = [(l, h) for l in [1, 2, 4] for h in [2, 4]]

    for num_layers, num_heads in configs:
        print(f"\n===== Layers: {num_layers}, Heads: {num_heads} =====")
        model = TransformerModel(
            fr_vocab_size=len(fr_vocab),
            en_vocab_size=len(en_vocab),
            emb_size=128,
            num_heads=num_heads,
            num_layers=num_layers
        )

        start = time.time()
        train_seq2seq(model, train_loader, epochs=5)
        duration = time.time() - start

        # Qualitative result
        translation = translate(model, "Nous sommes amis")

        results.append({
            "layers": num_layers,
            "heads": num_heads,
            "params": sum(p.numel() for p in model.parameters()),
            "time": round(duration, 2),
            "sample": translation
        })

    return results


In [9]:
transformer_fr_en_results = run_transformer_fr_to_en_configs()

print("\n=== Summary of FR → EN Transformer Configs ===")
for res in transformer_fr_en_results:
    print(f"Layers: {res['layers']} | Heads: {res['heads']} | Params: {res['params']:,} | Time: {res['time']}s")
    print("Translation (FR → EN):", res['sample'])
    print("-" * 60)



===== Layers: 1, Heads: 2 =====
Epoch 1 | Loss: 275.7745 | Accuracy: 0.1935
Epoch 2 | Loss: 214.3554 | Accuracy: 0.3349
Epoch 3 | Loss: 175.5939 | Accuracy: 0.3917
Epoch 4 | Loss: 142.8398 | Accuracy: 0.4485
Epoch 5 | Loss: 118.2405 | Accuracy: 0.5499

===== Layers: 1, Heads: 4 =====
Epoch 1 | Loss: 272.9899 | Accuracy: 0.1905
Epoch 2 | Loss: 219.8911 | Accuracy: 0.2796
Epoch 3 | Loss: 177.8433 | Accuracy: 0.3794
Epoch 4 | Loss: 141.0090 | Accuracy: 0.4747
Epoch 5 | Loss: 113.8762 | Accuracy: 0.5607

===== Layers: 2, Heads: 2 =====
Epoch 1 | Loss: 278.2978 | Accuracy: 0.1935
Epoch 2 | Loss: 239.2145 | Accuracy: 0.2780
Epoch 3 | Loss: 212.1012 | Accuracy: 0.3456
Epoch 4 | Loss: 192.1183 | Accuracy: 0.3518
Epoch 5 | Loss: 169.7797 | Accuracy: 0.3687

===== Layers: 2, Heads: 4 =====
Epoch 1 | Loss: 278.1284 | Accuracy: 0.1659
Epoch 2 | Loss: 244.5136 | Accuracy: 0.2227
Epoch 3 | Loss: 214.6913 | Accuracy: 0.3103
Epoch 4 | Loss: 187.5665 | Accuracy: 0.3794
Epoch 5 | Loss: 161.4435 | Accur

In [10]:
#######RNN Encoder-Decoder for FR → EN
class RNNEncoder(nn.Module):
    def __init__(self, input_size, emb_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(input_size, emb_size)
        self.rnn = nn.GRU(emb_size, hidden_size, batch_first=True)

    def forward(self, x):
        emb = self.embedding(x)
        outputs, hidden = self.rnn(emb)
        return outputs, hidden

class RNNDecoder(nn.Module):
    def __init__(self, output_size, emb_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(output_size, emb_size)
        self.rnn = nn.GRU(emb_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        emb = self.embedding(x)
        out, hidden = self.rnn(emb, hidden)
        return self.fc(out), hidden

class Seq2SeqRNN(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, tgt):
        _, hidden = self.encoder(src)
        output, _ = self.decoder(tgt, hidden)
        return output


In [11]:
##Train Function for RNN

def train_rnn(model, loader, epochs=5):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=en_vocab.pad)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct, total = 0, 0

        for src, tgt in loader:
            tgt_input = tgt[:, :-1]
            tgt_expected = tgt[:, 1:]

            optimizer.zero_grad()
            output = model(src, tgt_input)
            output = output.reshape(-1, output.shape[-1])
            tgt_expected = tgt_expected.reshape(-1)

            loss = criterion(output, tgt_expected)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (output.argmax(1) == tgt_expected).sum().item()
            total += tgt_expected.ne(en_vocab.pad).sum().item()

        acc = correct / total
        print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")


In [12]:
###Translate FR → EN (RNN no attention)

def translate_rnn(model, sentence, max_len=20):
    model.eval()
    src = torch.tensor(fr_vocab.encode(sentence)).unsqueeze(0).to(device)
    tgt = torch.tensor([[en_vocab.bos]]).to(device)

    with torch.no_grad():
        _, hidden = model.encoder(src)
        for _ in range(max_len):
            out, hidden = model.decoder(tgt[:, -1:], hidden)
            next_token = out[:, -1, :].argmax(1, keepdim=True)
            tgt = torch.cat([tgt, next_token], dim=1)
            if next_token.item() == en_vocab.eos:
                break

    return en_vocab.decode(tgt[0].tolist())


In [13]:
####Run RNN Model (FR → EN)

emb_size = 128
hidden_size = 256

rnn_enc = RNNEncoder(len(fr_vocab), emb_size, hidden_size)
rnn_dec = RNNDecoder(len(en_vocab), emb_size, hidden_size)
rnn_model = Seq2SeqRNN(rnn_enc, rnn_dec)

train_rnn(rnn_model, train_loader, epochs=10)

print("\nSample RNN Translations (FR → EN):")
print("FR: Elle est heureuse\nEN:", translate_rnn(rnn_model, "Elle est heureuse"))
print("FR: Nous sommes amis\nEN:", translate_rnn(rnn_model, "Nous sommes amis"))


Epoch 1 | Loss: 274.0249 | Accuracy: 0.1751
Epoch 2 | Loss: 208.5327 | Accuracy: 0.2934
Epoch 3 | Loss: 161.9120 | Accuracy: 0.3902
Epoch 4 | Loss: 124.8780 | Accuracy: 0.4639
Epoch 5 | Loss: 96.1698 | Accuracy: 0.5853
Epoch 6 | Loss: 71.4778 | Accuracy: 0.7051
Epoch 7 | Loss: 52.0005 | Accuracy: 0.7757
Epoch 8 | Loss: 38.3043 | Accuracy: 0.8433
Epoch 9 | Loss: 28.5458 | Accuracy: 0.9078
Epoch 10 | Loss: 20.9452 | Accuracy: 0.9539

Sample RNN Translations (FR → EN):
FR: Elle est heureuse
EN: elle attache ses lacets
FR: Nous sommes amis
EN: elle attache ses lacets


In [14]:
###Attention Module

class Attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attn = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))

    def forward(self, hidden, encoder_outputs):
        # hidden: [batch, 1, hidden], encoder_outputs: [batch, seq_len, hidden]
        batch_size = encoder_outputs.size(0)
        seq_len = encoder_outputs.size(1)

        hidden = hidden.repeat(1, seq_len, 1)  # [batch, seq_len, hidden]
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)  # [batch, hidden, seq_len]
        v = self.v.repeat(batch_size, 1).unsqueeze(1)  # [batch, 1, hidden]
        attn_weights = torch.bmm(v, energy).squeeze(1)  # [batch, seq_len]
        return torch.softmax(attn_weights, dim=1)  # [batch, seq_len]


In [15]:
##Decoder with Attention

class AttnDecoder(nn.Module):
    def __init__(self, output_size, emb_size, hidden_size, attention):
        super().__init__()
        self.embedding = nn.Embedding(output_size, emb_size)
        self.rnn = nn.GRU(hidden_size + emb_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)
        self.attention = attention

    def forward(self, x, hidden, encoder_outputs):
        embedded = self.embedding(x)  # [batch, 1, emb]
        attn_weights = self.attention(hidden.permute(1, 0, 2), encoder_outputs)  # [batch, seq_len]
        attn_weights = attn_weights.unsqueeze(1)  # [batch, 1, seq_len]
        context = torch.bmm(attn_weights, encoder_outputs)  # [batch, 1, hidden]

        rnn_input = torch.cat((embedded, context), dim=2)  # [batch, 1, emb + hidden]
        output, hidden = self.rnn(rnn_input, hidden)  # [batch, 1, hidden]
        output = self.fc(torch.cat((output, context), dim=2))  # [batch, 1, vocab]
        return output, hidden


In [16]:
###Seq2Seq Model with Attention

class Seq2SeqAttn(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, tgt):
        encoder_outputs, hidden = self.encoder(src)
        outputs = []

        for t in range(tgt.size(1)):
            out, hidden = self.decoder(tgt[:, t].unsqueeze(1), hidden, encoder_outputs)
            outputs.append(out)

        return torch.cat(outputs, dim=1)


In [17]:
####Training Function

def train_rnn_attention(model, loader, epochs=5):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=en_vocab.pad)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct, total = 0, 0

        for src, tgt in loader:
            tgt_input = tgt[:, :-1]
            tgt_expected = tgt[:, 1:]

            optimizer.zero_grad()
            output = model(src, tgt_input)
            output = output.reshape(-1, output.shape[-1])
            tgt_expected = tgt_expected.reshape(-1)

            loss = criterion(output, tgt_expected)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (output.argmax(1) == tgt_expected).sum().item()
            total += tgt_expected.ne(en_vocab.pad).sum().item()

        acc = correct / total
        print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")


In [18]:
####Translate with RNN + Attention

def translate_rnn_attention(model, sentence, max_len=20):
    model.eval()
    src = torch.tensor(fr_vocab.encode(sentence)).unsqueeze(0).to(device)
    tgt = torch.tensor([[en_vocab.bos]]).to(device)

    with torch.no_grad():
        encoder_outputs, hidden = model.encoder(src)
        for _ in range(max_len):
            out, hidden = model.decoder(tgt[:, -1:], hidden, encoder_outputs)
            next_token = out[:, -1, :].argmax(1, keepdim=True)
            tgt = torch.cat([tgt, next_token], dim=1)
            if next_token.item() == en_vocab.eos:
                break

    return en_vocab.decode(tgt[0].tolist())


In [19]:
##Run Model (FR → EN)

emb_size = 128
hidden_size = 256

attn = Attention(hidden_size)
rnn_enc_attn = RNNEncoder(len(fr_vocab), emb_size, hidden_size)
rnn_dec_attn = AttnDecoder(len(en_vocab), emb_size, hidden_size, attn)
rnn_attn_model = Seq2SeqAttn(rnn_enc_attn, rnn_dec_attn)

train_rnn_attention(rnn_attn_model, train_loader, epochs=10)

print("\nSample RNN + Attention Translations (FR → EN):")
print("FR: Elle est heureuse\nEN:", translate_rnn_attention(rnn_attn_model, "Elle est heureuse"))
print("FR: Nous sommes amis\nEN:", translate_rnn_attention(rnn_attn_model, "Nous sommes amis"))


Epoch 1 | Loss: 269.2495 | Accuracy: 0.2120
Epoch 2 | Loss: 210.8001 | Accuracy: 0.3026
Epoch 3 | Loss: 170.0229 | Accuracy: 0.3533
Epoch 4 | Loss: 132.1062 | Accuracy: 0.4393
Epoch 5 | Loss: 102.8654 | Accuracy: 0.5376
Epoch 6 | Loss: 79.1983 | Accuracy: 0.6436
Epoch 7 | Loss: 59.2340 | Accuracy: 0.7373
Epoch 8 | Loss: 45.8380 | Accuracy: 0.7896
Epoch 9 | Loss: 35.7974 | Accuracy: 0.8310
Epoch 10 | Loss: 29.4840 | Accuracy: 0.8664

Sample RNN + Attention Translations (FR → EN):
FR: Elle est heureuse
EN: il se peigne les cheveux
FR: Nous sommes amis
EN: il se peigne les cheveux
