In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from collections import Counter
import torch.nn.functional as F

english_to_french = [

    ("I am cold", "J'ai froid"),

    ("You are tired", "Tu es fatigué"),

    ("He is hungry", "Il a faim"),

    ("She is happy", "Elle est heureuse"),

    ("We are friends", "Nous sommes amis"),

    ("They are students", "Ils sont étudiants"),

    ("The cat is sleeping", "Le chat dort"),

    ("The sun is shining", "Le soleil brille"),

    ("We love music", "Nous aimons la musique"),

    ("She speaks French fluently", "Elle parle français couramment"),

    ("He enjoys reading books", "Il aime lire des livres"),

    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),

    ("The movie starts at 7 PM", "Le film commence à 19 heures"),

    ("She wears a red dress", "Elle porte une robe rouge"),

    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),

    ("He drives a blue car", "Il conduit une voiture bleue"),

    ("They visit museums often", "Ils visitent souvent des musées"),

    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),

    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),

    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),

    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),

    ("They travel around the world", "Ils voyagent autour du monde"),

    ("The book is on the table", "Le livre est sur la table"),

    ("She dances gracefully", "Elle danse avec grâce"),

    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),

    ("He works hard every day", "Il travaille dur tous les jours"),

    ("They speak different languages", "Ils parlent différentes langues"),

    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),

    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),

    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),

    ("The dog barks loudly", "Le chien aboie bruyamment"),

    ("He sings beautifully", "Il chante magnifiquement"),

    ("They swim in the pool", "Ils nagent dans la piscine"),

    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),

    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),

    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),

    ("He paints landscapes", "Il peint des paysages"),

    ("They laugh at the joke", "Ils rient de la blague"),

    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),

    ("She runs in the park", "Elle court dans le parc"),

    ("We travel by train", "Nous voyageons en train"),

    ("He writes a letter", "Il écrit une lettre"),

    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),

    ("The baby cries", "Le bébé pleure"),

    ("She studies hard for exams", "Elle étudie dur pour les examens"),

    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),

    ("He fixes the car", "Il répare la voiture"),

    ("They drink coffee in the morning", "Ils boivent du café le matin"),

    ("The sun sets in the evening", "Le soleil se couche le soir"),

    ("She dances at the party", "Elle danse à la fête"),

    ("We play music at the concert", "Nous jouons de la musique au concert"),

    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),

    ("They study French grammar", "Ils étudient la grammaire française"),

    ("The rain falls gently", "La pluie tombe doucement"),

    ("She sings a song", "Elle chante une chanson"),

    ("We watch a movie together", "Nous regardons un film ensemble"),

    ("He sleeps deeply", "Il dort profondément"),

    ("They travel to Paris", "Ils voyagent à Paris"),

    ("The children play in the park", "Les enfants jouent dans le parc"),

    ("She walks along the beach", "Elle se promène le long de la plage"),

    ("We talk on the phone", "Nous parlons au téléphone"),

    ("He waits for the bus", "Il attend le bus"),

    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),

    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),

    ("She dreams of flying", "Elle rêve de voler"),

    ("We work in the office", "Nous travaillons au bureau"),

    ("He studies history", "Il étudie l'histoire"),

    ("They listen to the radio", "Ils écoutent la radio"),

    ("The wind blows gently", "Le vent souffle doucement"),

    ("She swims in the ocean", "Elle nage dans l'océan"),

    ("We dance at the wedding", "Nous dansons au mariage"),

    ("He climbs the mountain", "Il gravit la montagne"),

    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),

    ("The cat meows loudly", "Le chat miaule bruyamment"),

    ("She paints a picture", "Elle peint un tableau"),

    ("We build a sandcastle", "Nous construisons un château de sable"),

    ("He sings in the choir", "Il chante dans le chœur")

]


def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in sentence]
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    vocab.update({token: i+3 for i, token in enumerate(set(tokens))})
    return vocab

# Custom dataset class
class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<sos>'] + sentence + ['<eos>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<sos>'] + sentence + ['<eos>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_sentence = self.src_sentences[idx]
        tgt_sentence = self.tgt_sentences[idx]
        return torch.tensor(src_sentence, dtype=torch.long), torch.tensor(tgt_sentence, dtype=torch.long)

# Tokenize and build vocab
tokenized_en = [tokenize(en) for en, fr in english_to_french]
tokenized_fr = [tokenize(fr) for en, fr in english_to_french]
en_vocab = build_vocab(tokenized_en)
fr_vocab = build_vocab(tokenized_fr)
rev_fr_vocab = {v: k for k, v in fr_vocab.items()}

# Assuming the division between training and validation data has been made
# For demonstration, using the full dataset for both training and validation
train_dataset = TranslationDataset(tokenized_en, tokenized_fr, en_vocab, fr_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False)

# Encoder definition
class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers, dropout):
        super(EncoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers=n_layers, dropout=(0 if n_layers == 1 else dropout))

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size)

# Decoder definition
class DecoderGRU(nn.Module):
    def __init__(self, output_size, hidden_size, n_layers, dropout):
        super(DecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers=n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size)

# Initialize models, optimizers, and loss function
enc = EncoderGRU(len(en_vocab), 256, 2, 0.5)
dec = DecoderGRU(len(fr_vocab), 256, 2, 0.5)
enc_optimizer = optim.SGD(enc.parameters(), lr=0.01)
dec_optimizer = optim.SGD(dec.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=50):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0
    correct = 0  # To calculate accuracy

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size)

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[fr_vocab['<sos>']]])
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
        if decoder_input.item() == target_tensor[di].item():
            correct += 1

        if decoder_input.item() == fr_vocab['<eos>']:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    accuracy = correct / target_length  # Calculate accuracy

    return loss.item() / target_length, accuracy

# Validation function
def validate(encoder, decoder, dataloader, criterion, max_length=50):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    total_accuracy = 0

    with torch.no_grad():
        for src, tgt in dataloader:
            src, tgt = src.squeeze(0), tgt.squeeze(0)
            loss, accuracy = evaluate(src, tgt, encoder, decoder, criterion, max_length)
            total_loss += loss
            total_accuracy += accuracy

    avg_loss = total_loss / len(dataloader)
    avg_accuracy = total_accuracy / len(dataloader)
    return avg_loss, avg_accuracy

# Evaluate function
def evaluate(src_tensor, tgt_tensor, encoder, decoder, criterion, max_length=50):
    encoder_hidden = encoder.initHidden()
    input_length = src_tensor.size(0)
    target_length = tgt_tensor.size(0)
    loss = 0
    correct = 0

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size)

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(src_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[fr_vocab['<sos>']]])
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        if topi.item() == tgt_tensor[di].item():
            correct += 1
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, tgt_tensor[di].unsqueeze(0))
        if decoder_input.item() == fr_vocab['<eos>']:
            break

    accuracy = correct / target_length
    return loss.item(), accuracy

def train_loop(num_epochs, train_dataloader, val_dataloader, encoder, decoder, enc_optimizer, dec_optimizer, criterion):
    for epoch in range(num_epochs):
        total_train_loss = 0
        total_train_accuracy = 0

        encoder.train()
        decoder.train()
        for src, tgt in train_dataloader:
            src, tgt = src.squeeze(0), tgt.squeeze(0)
            loss, accuracy = train(src, tgt, encoder, decoder, enc_optimizer, dec_optimizer, criterion)
            total_train_loss += loss
            total_train_accuracy += accuracy

        avg_train_loss = total_train_loss / len(train_dataloader)
        avg_train_accuracy = total_train_accuracy / len(train_dataloader)

        # Validation phase with loss and accuracy
        avg_val_loss, avg_val_accuracy = validate(encoder, decoder, val_dataloader, criterion)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Train Accuracy: {avg_train_accuracy:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {avg_val_accuracy:.4f}')


# Train the model
train_loop(50, train_dataloader, val_dataloader, enc, dec, enc_optimizer, dec_optimizer, criterion)

# Translation function
def translate(sentence, encoder, decoder, src_vocab, tgt_vocab, rev_tgt_vocab, max_length=50):
    with torch.no_grad():
        input_tensor = [src_vocab[token] for token in tokenize(sentence)]
        input_tensor = torch.tensor(input_tensor, dtype=torch.long).view(-1, 1)

        encoder_hidden = encoder.initHidden()

        input_length = input_tensor.size(0)
        encoder_outputs = torch.zeros(max_length, encoder.hidden_size)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[tgt_vocab['<sos>']]])
        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)

            if topi.item() == tgt_vocab['<eos>']:
                break  # Only break if <EOS> is genuinely predicted
            else:
                decoded_words.append(rev_tgt_vocab[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(decoded_words)

# Example translation
example_sentence = "The baby cries"
translated_sentence = translate(example_sentence, enc, dec, en_vocab, fr_vocab, rev_fr_vocab)
print(f'Translated: {translated_sentence}')


Epoch 1/50, Train Loss: 3.3515, Train Accuracy: 0.1604, Val Loss: 14.4219, Val Accuracy: 0.1554
Epoch 2/50, Train Loss: 2.4515, Train Accuracy: 0.1875, Val Loss: 12.8186, Val Accuracy: 0.1704
Epoch 3/50, Train Loss: 2.2409, Train Accuracy: 0.1926, Val Loss: 15.4213, Val Accuracy: 0.1978
Epoch 4/50, Train Loss: 2.4782, Train Accuracy: 0.2200, Val Loss: 12.0950, Val Accuracy: 0.1864
Epoch 5/50, Train Loss: 2.2742, Train Accuracy: 0.2053, Val Loss: 15.8559, Val Accuracy: 0.2388
Epoch 6/50, Train Loss: 2.3918, Train Accuracy: 0.2073, Val Loss: 15.6087, Val Accuracy: 0.2598
Epoch 7/50, Train Loss: 2.4950, Train Accuracy: 0.2268, Val Loss: 15.4959, Val Accuracy: 0.2244
Epoch 8/50, Train Loss: 2.5180, Train Accuracy: 0.2284, Val Loss: 12.2621, Val Accuracy: 0.2098
Epoch 9/50, Train Loss: 2.4192, Train Accuracy: 0.2312, Val Loss: 14.8061, Val Accuracy: 0.2705
Epoch 10/50, Train Loss: 2.5100, Train Accuracy: 0.2525, Val Loss: 14.8362, Val Accuracy: 0.2287
Epoch 11/50, Train Loss: 2.4441, Train 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Simple tokenization and vocab building
def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in tokenize(sentence)]
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    vocab.update({token: i+3 for i, token in enumerate(sorted(set(tokens)))})
    return vocab

class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<sos>'] + tokenize(sentence) + ['<eos>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<sos>'] + tokenize(sentence) + ['<eos>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        return torch.tensor(self.src_sentences[idx]), torch.tensor(self.tgt_sentences[idx])

def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, padding_value=0, batch_first=True)
    tgt_batch = pad_sequence(tgt_batch, padding_value=0, batch_first=True)
    return src_batch, tgt_batch

english_to_french = [

    ("I am cold", "J'ai froid"),

    ("You are tired", "Tu es fatigué"),

    ("He is hungry", "Il a faim"),

    ("She is happy", "Elle est heureuse"),

    ("We are friends", "Nous sommes amis"),

    ("They are students", "Ils sont étudiants"),

    ("The cat is sleeping", "Le chat dort"),

    ("The sun is shining", "Le soleil brille"),

    ("We love music", "Nous aimons la musique"),

    ("She speaks French fluently", "Elle parle français couramment"),

    ("He enjoys reading books", "Il aime lire des livres"),

    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),

    ("The movie starts at 7 PM", "Le film commence à 19 heures"),

    ("She wears a red dress", "Elle porte une robe rouge"),

    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),

    ("He drives a blue car", "Il conduit une voiture bleue"),

    ("They visit museums often", "Ils visitent souvent des musées"),

    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),

    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),

    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),

    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),

    ("They travel around the world", "Ils voyagent autour du monde"),

    ("The book is on the table", "Le livre est sur la table"),

    ("She dances gracefully", "Elle danse avec grâce"),

    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),

    ("He works hard every day", "Il travaille dur tous les jours"),

    ("They speak different languages", "Ils parlent différentes langues"),

    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),

    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),

    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),

    ("The dog barks loudly", "Le chien aboie bruyamment"),

    ("He sings beautifully", "Il chante magnifiquement"),

    ("They swim in the pool", "Ils nagent dans la piscine"),

    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),

    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),

    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),

    ("He paints landscapes", "Il peint des paysages"),

    ("They laugh at the joke", "Ils rient de la blague"),

    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),

    ("She runs in the park", "Elle court dans le parc"),

    ("We travel by train", "Nous voyageons en train"),

    ("He writes a letter", "Il écrit une lettre"),

    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),

    ("The baby cries", "Le bébé pleure"),

    ("She studies hard for exams", "Elle étudie dur pour les examens"),

    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),

    ("He fixes the car", "Il répare la voiture"),

    ("They drink coffee in the morning", "Ils boivent du café le matin"),

    ("The sun sets in the evening", "Le soleil se couche le soir"),

    ("She dances at the party", "Elle danse à la fête"),

    ("We play music at the concert", "Nous jouons de la musique au concert"),

    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),

    ("They study French grammar", "Ils étudient la grammaire française"),

    ("The rain falls gently", "La pluie tombe doucement"),

    ("She sings a song", "Elle chante une chanson"),

    ("We watch a movie together", "Nous regardons un film ensemble"),

    ("He sleeps deeply", "Il dort profondément"),

    ("They travel to Paris", "Ils voyagent à Paris"),

    ("The children play in the park", "Les enfants jouent dans le parc"),

    ("She walks along the beach", "Elle se promène le long de la plage"),

    ("We talk on the phone", "Nous parlons au téléphone"),

    ("He waits for the bus", "Il attend le bus"),

    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),

    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),

    ("She dreams of flying", "Elle rêve de voler"),

    ("We work in the office", "Nous travaillons au bureau"),

    ("He studies history", "Il étudie l'histoire"),

    ("They listen to the radio", "Ils écoutent la radio"),

    ("The wind blows gently", "Le vent souffle doucement"),

    ("She swims in the ocean", "Elle nage dans l'océan"),

    ("We dance at the wedding", "Nous dansons au mariage"),

    ("He climbs the mountain", "Il gravit la montagne"),

    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),

    ("The cat meows loudly", "Le chat miaule bruyamment"),

    ("She paints a picture", "Elle peint un tableau"),

    ("We build a sandcastle", "Nous construisons un château de sable"),

    ("He sings in the choir", "Il chante dans le chœur")

]

english_sentences, french_sentences = zip(*english_to_french)

src_vocab = build_vocab(english_sentences)
tgt_vocab = build_vocab(french_sentences)

train_dataset = TranslationDataset(english_sentences, french_sentences, src_vocab, tgt_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, num_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, num_layers, dropout=dropout, batch_first=True)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.gru(embedded)
        return outputs, hidden

class Attention(nn.Module):
    def __init__(self, hid_dim):
        super().__init__()
        self.attn = nn.Linear(hid_dim * 2, hid_dim)
        self.v = nn.Linear(hid_dim, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        batch_size = encoder_outputs.shape[0]
        src_len = encoder_outputs.shape[1]

        hidden = hidden.repeat(src_len, 1, 1).permute(1, 0, 2)

        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = self.v(energy).squeeze(2)
        return torch.softmax(attention, dim=1)

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, num_layers, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU(hid_dim + emb_dim, hid_dim, num_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(hid_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(1)  # Ensure input is [batch_size, 1] for embedding
        embedded = self.dropout(self.embedding(input))

        a = self.attention(hidden[-1], encoder_outputs)  # Ensure attention uses last hidden state
        a = a.unsqueeze(1)

        weighted = torch.bmm(a, encoder_outputs)  # Weighted encoder outputs as context
        rnn_input = torch.cat((embedded, weighted), dim=2)  # Concatenate embedded input and context

        output, hidden = self.gru(rnn_input, hidden)  # GRU forward pass

        # Directly concatenate the output and weighted context for the fully connected layer
        prediction = self.fc_out(torch.cat((output.squeeze(1), weighted.squeeze(1)), dim=1))

        return prediction, hidden

INPUT_DIM = len(src_vocab)
OUTPUT_DIM = len(tgt_vocab)
ENC_EMB_DIM = DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = DEC_DROPOUT = 0.5

encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT).to(device)
attention = Attention(HID_DIM).to(device)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT, attention).to(device)

optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()))
criterion = nn.CrossEntropyLoss(ignore_index=0)


def train_and_evaluate(encoder, decoder, train_dataloader, val_dataloader, optimizer, criterion, device, n_epochs=10):

    for epoch in range(n_epochs):
        # Reset metrics at the start of each epoch
        training_loss, training_accuracy = 0.0, 0.0
        correct_tokens, total_tokens = 0, 0

        encoder.train()
        decoder.train()

        for src, tgt in train_dataloader:
            src, tgt = src.to(device), tgt.to(device)
            optimizer.zero_grad()

            encoder_outputs, hidden = encoder(src)

            # Assuming the first token provided to the decoder is <sos>
            input = tgt[:, 0]
            output_tokens = torch.zeros(tgt.size(0), tgt.size(1)-1, OUTPUT_DIM, device=device)

            for t in range(1, tgt.size(1)):
                output, hidden = decoder(input, hidden, encoder_outputs)
                output_tokens[:, t-1, :] = output
                top1 = output.argmax(1)
                correct_tokens += (top1 == tgt[:, t]).sum().item()
                total_tokens += tgt[:, t].numel()
                input = top1  # Using greedy decoding for simplicity

            output_dim = output_tokens.shape[-1]
            output_tokens = output_tokens.contiguous().view(-1, output_dim)
            tgt = tgt[:, 1:].contiguous().view(-1)

            loss = criterion(output_tokens, tgt)
            loss.backward()
            optimizer.step()

            training_loss += loss.item()

        train_loss = training_loss / len(train_dataloader)
        train_accuracy = correct_tokens / total_tokens

        # Validation (Similar structure, calculate val_loss and val_accuracy)
        val_loss, val_accuracy = evaluate(encoder, decoder, val_dataloader, criterion, device)


        print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Training Acc: {train_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.4f}')

def evaluate(encoder, decoder, dataloader, criterion, device):
    encoder.eval()
    decoder.eval()
    epoch_loss = 0
    correct_tokens, total_tokens = 0, 0

    with torch.no_grad():
        for src, tgt in dataloader:
            src, tgt = src.to(device), tgt.to(device)

            encoder_outputs, hidden = encoder(src)
            output_tokens = torch.zeros(tgt.size(0), tgt.size(1)-1, OUTPUT_DIM, device=device)
            input = tgt[:, 0]

            for t in range(1, tgt.size(1)):
                output, hidden = decoder(input, hidden, encoder_outputs)
                output_tokens[:, t-1, :] = output
                input = output.argmax(1)
                correct_tokens += (input == tgt[:, t]).sum().item()
                total_tokens += input.shape[0]

            output_dim = output_tokens.shape[-1]
            output_tokens = output_tokens.contiguous().view(-1, output_dim)
            tgt = tgt[:, 1:].contiguous().view(-1)

            loss = criterion(output_tokens, tgt)
            epoch_loss += loss.item()

    val_loss = epoch_loss / len(dataloader)
    val_accuracy = correct_tokens / total_tokens

    return val_loss, val_accuracy

# Assuming all other components (model definition, optimizer setup) are the same
train_and_evaluate(encoder, decoder, train_dataloader, val_dataloader, optimizer, criterion, device, n_epochs=25)

def translate_sentence(sentence, src_vocab, tgt_vocab, encoder, decoder, device, max_length=50):
    encoder.eval()
    decoder.eval()

    tokens = ['<sos>'] + tokenize(sentence) + ['<eos>']
    src_indexes = [src_vocab[token] for token in tokens]
    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)

    with torch.no_grad():
        encoder_outputs, hidden = encoder(src_tensor)

    tgt_indexes = [tgt_vocab['<sos>']]

    for _ in range(max_length):
        tgt_tensor = torch.LongTensor([tgt_indexes[-1]]).to(device)

        with torch.no_grad():
            output, hidden = decoder(tgt_tensor, hidden, encoder_outputs)

        pred_token = output.argmax(1).item()
        tgt_indexes.append(pred_token)

        if pred_token == tgt_vocab['<eos>']:
            break

    tgt_tokens = [list(tgt_vocab.keys())[list(tgt_vocab.values()).index(i)] for i in tgt_indexes]

    return tgt_tokens[1:-1]  # Remove <sos> and <eos> tokens

# Translate a single sentence
example_sentence = "The baby cries"
print(f'Original in English: {example_sentence}')
translation = translate_sentence(example_sentence, src_vocab, tgt_vocab, encoder, decoder, device)
print(f'Translated to French: {" ".join(translation)}')


Epoch: 1, Training Loss: 4.5600, Training Acc: 0.1900, Validation Loss: 4.8737, Validation Acc: 0.1770
Epoch: 2, Training Loss: 3.6869, Training Acc: 0.2703, Validation Loss: 5.0579, Validation Acc: 0.1925
Epoch: 3, Training Loss: 3.1372, Training Acc: 0.3178, Validation Loss: 5.4097, Validation Acc: 0.1836
Epoch: 4, Training Loss: 2.5800, Training Acc: 0.3315, Validation Loss: 5.8698, Validation Acc: 0.1704
Epoch: 5, Training Loss: 2.1575, Training Acc: 0.3834, Validation Loss: 6.2447, Validation Acc: 0.1792
Epoch: 6, Training Loss: 1.7035, Training Acc: 0.4384, Validation Loss: 6.6514, Validation Acc: 0.1792
Epoch: 7, Training Loss: 1.3322, Training Acc: 0.5329, Validation Loss: 6.7263, Validation Acc: 0.1792
Epoch: 8, Training Loss: 0.9202, Training Acc: 0.6160, Validation Loss: 7.3957, Validation Acc: 0.1726
Epoch: 9, Training Loss: 0.8105, Training Acc: 0.6574, Validation Loss: 7.6785, Validation Acc: 0.1615
Epoch: 10, Training Loss: 0.7113, Training Acc: 0.6822, Validation Loss: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Simple tokenization and vocab building
def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in tokenize(sentence)]
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    vocab.update({token: i+3 for i, token in enumerate(sorted(set(tokens)))})
    return vocab

class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<sos>'] + tokenize(sentence) + ['<eos>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<sos>'] + tokenize(sentence) + ['<eos>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        return torch.tensor(self.tgt_sentences[idx]), torch.tensor(self.src_sentences[idx])

def collate_fn(batch):
    tgt_batch, src_batch = zip(*batch)
    tgt_batch = pad_sequence(tgt_batch, padding_value=0, batch_first=True)
    src_batch = pad_sequence(src_batch, padding_value=0, batch_first=True)
    return tgt_batch, src_batch

english_to_french = [

    ("I am cold", "J'ai froid"),

    ("You are tired", "Tu es fatigué"),

    ("He is hungry", "Il a faim"),

    ("She is happy", "Elle est heureuse"),

    ("We are friends", "Nous sommes amis"),

    ("They are students", "Ils sont étudiants"),

    ("The cat is sleeping", "Le chat dort"),

    ("The sun is shining", "Le soleil brille"),

    ("We love music", "Nous aimons la musique"),

    ("She speaks French fluently", "Elle parle français couramment"),

    ("He enjoys reading books", "Il aime lire des livres"),

    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),

    ("The movie starts at 7 PM", "Le film commence à 19 heures"),

    ("She wears a red dress", "Elle porte une robe rouge"),

    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),

    ("He drives a blue car", "Il conduit une voiture bleue"),

    ("They visit museums often", "Ils visitent souvent des musées"),

    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),

    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),

    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),

    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),

    ("They travel around the world", "Ils voyagent autour du monde"),

    ("The book is on the table", "Le livre est sur la table"),

    ("She dances gracefully", "Elle danse avec grâce"),

    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),

    ("He works hard every day", "Il travaille dur tous les jours"),

    ("They speak different languages", "Ils parlent différentes langues"),

    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),

    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),

    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),

    ("The dog barks loudly", "Le chien aboie bruyamment"),

    ("He sings beautifully", "Il chante magnifiquement"),

    ("They swim in the pool", "Ils nagent dans la piscine"),

    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),

    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),

    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),

    ("He paints landscapes", "Il peint des paysages"),

    ("They laugh at the joke", "Ils rient de la blague"),

    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),

    ("She runs in the park", "Elle court dans le parc"),

    ("We travel by train", "Nous voyageons en train"),

    ("He writes a letter", "Il écrit une lettre"),

    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),

    ("The baby cries", "Le bébé pleure"),

    ("She studies hard for exams", "Elle étudie dur pour les examens"),

    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),

    ("He fixes the car", "Il répare la voiture"),

    ("They drink coffee in the morning", "Ils boivent du café le matin"),

    ("The sun sets in the evening", "Le soleil se couche le soir"),

    ("She dances at the party", "Elle danse à la fête"),

    ("We play music at the concert", "Nous jouons de la musique au concert"),

    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),

    ("They study French grammar", "Ils étudient la grammaire française"),

    ("The rain falls gently", "La pluie tombe doucement"),

    ("She sings a song", "Elle chante une chanson"),

    ("We watch a movie together", "Nous regardons un film ensemble"),

    ("He sleeps deeply", "Il dort profondément"),

    ("They travel to Paris", "Ils voyagent à Paris"),

    ("The children play in the park", "Les enfants jouent dans le parc"),

    ("She walks along the beach", "Elle se promène le long de la plage"),

    ("We talk on the phone", "Nous parlons au téléphone"),

    ("He waits for the bus", "Il attend le bus"),

    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),

    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),

    ("She dreams of flying", "Elle rêve de voler"),

    ("We work in the office", "Nous travaillons au bureau"),

    ("He studies history", "Il étudie l'histoire"),

    ("They listen to the radio", "Ils écoutent la radio"),

    ("The wind blows gently", "Le vent souffle doucement"),

    ("She swims in the ocean", "Elle nage dans l'océan"),

    ("We dance at the wedding", "Nous dansons au mariage"),

    ("He climbs the mountain", "Il gravit la montagne"),

    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),

    ("The cat meows loudly", "Le chat miaule bruyamment"),

    ("She paints a picture", "Elle peint un tableau"),

    ("We build a sandcastle", "Nous construisons un château de sable"),

    ("He sings in the choir", "Il chante dans le chœur")

]

french_to_english = [(french, english) for english, french in english_to_french]

french_sentences, english_sentences = zip(*french_to_english)

src_vocab = build_vocab(french_sentences)
tgt_vocab = build_vocab(english_sentences)

train_dataset = TranslationDataset(french_sentences, english_sentences, src_vocab, tgt_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

# Define validation dataset and dataloader
val_dataset = TranslationDataset(french_sentences[:10], english_sentences[:10], src_vocab, tgt_vocab)
val_dataloader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)

# Define model parameters
INPUT_DIM = len(src_vocab)
OUTPUT_DIM = len(tgt_vocab)
ENC_EMB_DIM = DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = DEC_DROPOUT = 0.5

# Define Encoder class
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, num_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, num_layers, dropout=dropout, batch_first=True)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.gru(embedded)
        return outputs, hidden

# Define Attention class
class Attention(nn.Module):
    def __init__(self, hid_dim):
        super().__init__()
        self.attn = nn.Linear(hid_dim * 2, hid_dim)
        self.v = nn.Linear(hid_dim, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        batch_size = encoder_outputs.shape[0]
        src_len = encoder_outputs.shape[1]

        hidden = hidden.repeat(src_len, 1, 1).permute(1, 0, 2)

        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = self.v(energy).squeeze(2)
        return torch.softmax(attention, dim=1)

# Define Decoder class
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, num_layers, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU(hid_dim + emb_dim, hid_dim, num_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(hid_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(1)  # Ensure input is [batch_size, 1] for embedding
        embedded = self.dropout(self.embedding(input))

        a = self.attention(hidden[-1], encoder_outputs)  # Ensure attention uses last hidden state
        a = a.unsqueeze(1)

        weighted = torch.bmm(a, encoder_outputs)  # Weighted encoder outputs as context
        rnn_input = torch.cat((embedded, weighted), dim=2)  # Concatenate embedded input and context

        output, hidden = self.gru(rnn_input, hidden)  # GRU forward pass

        # Directly concatenate the output and weighted context for the fully connected layer
        prediction = self.fc_out(torch.cat((output.squeeze(1), weighted.squeeze(1)), dim=1))

        return prediction, hidden

# Initialize model components
encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT).to(device)
attention = Attention(HID_DIM).to(device)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT, attention).to(device)

# Initialize optimizer and criterion
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()))
criterion = nn.CrossEntropyLoss(ignore_index=0)

def train_and_evaluate(encoder, decoder, train_dataloader, val_dataloader, optimizer, criterion, device, n_epochs=10):
    for epoch in range(n_epochs):
        # Reset metrics at the start of each epoch
        training_loss, training_accuracy = 0.0, 0.0
        correct_tokens, total_tokens = 0, 0

        encoder.train()
        decoder.train()

        for tgt, src in train_dataloader:
            tgt, src = tgt.to(device), src.to(device)
            optimizer.zero_grad()

            encoder_outputs, hidden = encoder(src)

            # Assuming the first token provided to the decoder is <sos>
            input = tgt[:, 0]
            output_tokens = torch.zeros(tgt.size(0), tgt.size(1)-1, OUTPUT_DIM, device=device)

            for t in range(1, tgt.size(1)):
                output, hidden = decoder(input, hidden, encoder_outputs)
                output_tokens[:, t-1, :] = output
                top1 = output.argmax(1)
                correct_tokens += (top1 == tgt[:, t]).sum().item()
                total_tokens += tgt[:, t].numel()
                input = top1  # Using greedy decoding for simplicity

            output_dim = output_tokens.shape[-1]
            output_tokens = output_tokens.contiguous().view(-1, output_dim)
            tgt = tgt[:, 1:].contiguous().view(-1)

            loss = criterion(output_tokens, tgt)
            loss.backward()
            optimizer.step()

            training_loss += loss.item()

        train_loss = training_loss / len(train_dataloader)
        train_accuracy = correct_tokens / total_tokens

        # Validation
        val_loss, val_accuracy = evaluate(encoder, decoder, val_dataloader, criterion, device)

        print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Training Acc: {train_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.4f}')

def evaluate(encoder, decoder, dataloader, criterion, device):
    encoder.eval()
    decoder.eval()
    epoch_loss = 0
    correct_tokens, total_tokens = 0, 0

    with torch.no_grad():
        for tgt, src in dataloader:
            tgt, src = tgt.to(device), src.to(device)

            encoder_outputs, hidden = encoder(src)
            output_tokens = torch.zeros(tgt.size(0), tgt.size(1)-1, OUTPUT_DIM, device=device)
            input = tgt[:, 0]

            for t in range(1, tgt.size(1)):
                output, hidden = decoder(input, hidden, encoder_outputs)
                output_tokens[:, t-1, :] = output
                input = output.argmax(1)
                correct_tokens += (input == tgt[:, t]).sum().item()
                total_tokens += input.shape[0]

            output_dim = output_tokens.shape[-1]
            output_tokens = output_tokens.contiguous().view(-1, output_dim)
            tgt = tgt[:, 1:].contiguous().view(-1)

            loss = criterion(output_tokens, tgt)
            epoch_loss += loss.item()

    val_loss = epoch_loss / len(dataloader)
    val_accuracy = correct_tokens / total_tokens

    return val_loss, val_accuracy

# Train the model
train_and_evaluate(encoder, decoder, train_dataloader, val_dataloader, optimizer, criterion, device, n_epochs=15)

def translate_sentence(sentence, src_vocab, tgt_vocab, encoder, decoder, device, max_length=50):
    encoder.eval()
    decoder.eval()

    tokens = ['<sos>'] + tokenize(sentence) + ['<eos>']
    src_indexes = [src_vocab[token] for token in tokens]
    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)

    with torch.no_grad():
        encoder_outputs, hidden = encoder(src_tensor)

    tgt_indexes = [tgt_vocab['<sos>']]

    for _ in range(max_length):
        tgt_tensor = torch.LongTensor([tgt_indexes[-1]]).to(device)

        with torch.no_grad():
            output, hidden = decoder(tgt_tensor, hidden, encoder_outputs)

        pred_token = output.argmax(1).item()
        tgt_indexes.append(pred_token)

        if pred_token == tgt_vocab['<eos>']:
            break

    tgt_tokens = [list(tgt_vocab.keys())[list(tgt_vocab.values()).index(i)] for i in tgt_indexes]

    return tgt_tokens[1:-1]  # Remove <sos> and <eos> tokens

# Translate a single sentence
example_sentence = "Le bébé pleure"
print(f'Original in French: {example_sentence}')
translation = translate_sentence(example_sentence, src_vocab, tgt_vocab, encoder, decoder, device)
print(f'Translated to English: {" ".join(translation)}')


Epoch: 1, Training Loss: 4.2748, Training Acc: 0.2170, Validation Loss: 3.7927, Validation Acc: 0.1591
Epoch: 2, Training Loss: 3.4622, Training Acc: 0.3170, Validation Loss: 2.5371, Validation Acc: 0.3636
Epoch: 3, Training Loss: 2.8166, Training Acc: 0.3675, Validation Loss: 2.1813, Validation Acc: 0.4545
Epoch: 4, Training Loss: 2.2627, Training Acc: 0.4298, Validation Loss: 1.2560, Validation Acc: 0.7273
Epoch: 5, Training Loss: 1.8340, Training Acc: 0.4710, Validation Loss: 0.8384, Validation Acc: 0.8409
Epoch: 6, Training Loss: 1.3442, Training Acc: 0.5702, Validation Loss: 0.5065, Validation Acc: 0.8864
Epoch: 7, Training Loss: 0.8681, Training Acc: 0.6824, Validation Loss: 0.4394, Validation Acc: 0.8864
Epoch: 8, Training Loss: 0.6033, Training Acc: 0.7564, Validation Loss: 0.1562, Validation Acc: 0.9545
Epoch: 9, Training Loss: 0.4434, Training Acc: 0.7915, Validation Loss: 0.1530, Validation Acc: 0.9545
Epoch: 10, Training Loss: 0.2800, Training Acc: 0.8458, Validation Loss: 