In [3]:
!pip install -U spacy
!python -m spacy download de_core_news_sm

Collecting de-core-news-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.8.0/de_core_news_sm-3.8.0-py3-none-any.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: de-core-news-sm
Successfully installed de-core-news-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
import spacy
from datasets import load_dataset
from tqdm import tqdm
from collections import Counter

In [2]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [16]:
# --- Data Loading (WMT16 English-German) ---
print("Loading WMT16 English-German dataset...")
try:
    wmt16_en_de = load_dataset("wmt16", "de-en")
    train_data_raw = wmt16_en_de["train"]
    train_data_raw = train_data_raw.select(range(10000))
    print(f"Loaded {len(train_data_raw)} training samples.")
    print(f"Sample: {train_data_raw[0]['translation']}")
except Exception as e:
    print(f"Error loading WMT16 dataset: {e}")
    exit()

Loading WMT16 English-German dataset...
Loaded 10000 training samples.
Sample: {'de': 'Wiederaufnahme der Sitzungsperiode', 'en': 'Resumption of the session'}


In [17]:
try:
    spacy_en = spacy.load("en_core_web_sm")
    spacy_de = spacy.load("de_core_news_sm")
except OSError:
    print("SpaCy models not found. Please run: python -m spacy download en_core_web_sm de_core_news_sm")
    exit()

def tokenize_en(text):
    return [tok.text for tok in spacy_en(text)]

def tokenize_de(text):
    return [tok.text for tok in spacy_de(text)]

# Process raw data into tokenized sentence pairs
tokenized_pairs = []
for sample in tqdm(train_data_raw, desc="Tokenizing data"):
    en_tokens = tokenize_en(sample['translation']['en'])
    de_tokens = tokenize_de(sample['translation']['de'])
    tokenized_pairs.append((en_tokens, de_tokens))

print(f"Tokenized {len(tokenized_pairs)} pairs.")
print(f"Sample tokenized pair: {tokenized_pairs[0]}")

Tokenizing data: 100%|██████████| 10000/10000 [02:48<00:00, 59.25it/s]

Tokenized 10000 pairs.
Sample tokenized pair: (['Resumption', 'of', 'the', 'session'], ['Wiederaufnahme', 'der', 'Sitzungsperiode'])





In [18]:
# --- Vocabulary Building ---
PAD_TOKEN = '<PAD>'
UNK_TOKEN = '<UNK>'
SOS_TOKEN = '<SOS>'
EOS_TOKEN = '<EOS>'

def build_vocab(tokenized_sentences, max_words=10000):
    word_count = Counter()
    for sentence in tokenized_sentences:
        word_count.update(sentence)
    most_common = word_count.most_common(max_words - 4)
    vocab = {word: idx + 4 for idx, (word, _) in enumerate(most_common)}
    vocab[PAD_TOKEN] = 0
    vocab[UNK_TOKEN] = 1
    vocab[SOS_TOKEN] = 2
    vocab[EOS_TOKEN] = 3
    return vocab

src_sentences = [pair[0] for pair in tokenized_pairs]
trg_sentences = [pair[1] for pair in tokenized_pairs]

print("Building source (English) vocabulary...")
src_vocab = build_vocab(src_sentences)
src_vocab_size = len(src_vocab)
print(f"Source vocabulary size: {src_vocab_size}")

print("Building target (German) vocabulary...")
trg_vocab = build_vocab(trg_sentences)
trg_vocab_size = len(trg_vocab)
print(f"Target vocabulary size: {trg_vocab_size}")

src_idx_to_word = {idx: word for word, idx in src_vocab.items()}
trg_idx_to_word = {idx: word for word, idx in trg_vocab.items()}

Building source (English) vocabulary...
Source vocabulary size: 10000
Building target (German) vocabulary...
Target vocabulary size: 10000


In [19]:
# --- Sequence Encoding and Padding ---
MAX_SEQUENCE_LENGTH = 50

def encode_sequence(tokens, vocab, max_len, eos_token=None):
    encoded = [vocab.get(token, vocab[UNK_TOKEN]) for token in tokens[:max_len]]
    if eos_token is not None and len(encoded) < max_len:
        encoded.append(vocab[eos_token])
    if len(encoded) < max_len:
        encoded += [vocab[PAD_TOKEN]] * (max_len - len(encoded))
    encoded = encoded[:max_len]
    return encoded

encoded_pairs = []
for src_tokens, trg_tokens in tqdm(tokenized_pairs, desc="Encoding sequences"):
    src_encoded = encode_sequence(src_tokens, src_vocab, MAX_SEQUENCE_LENGTH)
    trg_encoded = encode_sequence(trg_tokens, trg_vocab, MAX_SEQUENCE_LENGTH, eos_token=EOS_TOKEN)
    encoded_pairs.append((src_encoded, trg_encoded))

src_sequences_tensor = torch.tensor([pair[0] for pair in encoded_pairs], dtype=torch.long)
trg_sequences_tensor = torch.tensor([pair[1] for pair in encoded_pairs], dtype=torch.long)

Encoding sequences: 100%|██████████| 10000/10000 [00:00<00:00, 74508.45it/s]


In [20]:
# --- Create PyTorch Dataset and DataLoaders ---
class TranslationDataset(Dataset):
    def __init__(self, src_sequences, trg_sequences):
        self.src_sequences = src_sequences
        self.trg_sequences = trg_sequences

    def __len__(self):
        return len(self.src_sequences)

    def __getitem__(self, idx):
        return self.src_sequences[idx], self.trg_sequences[idx]

train_dataset = TranslationDataset(src_sequences_tensor, trg_sequences_tensor)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [21]:
# --- Build Encoder, Decoder, and Seq2Seq Models ---
class Encoder(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers=1, dropout=0.5, bidirectional=False):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx=0)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers,
                            bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True)
        self.fc_hidden = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim)
        self.fc_cell = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, (hidden, cell) = self.rnn(embedded)
        if self.bidirectional:
            hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
            cell = torch.cat((cell[-2, :, :], cell[-1, :, :]), dim=1)
        hidden = self.fc_hidden(hidden).unsqueeze(0)
        cell = self.fc_cell(cell).unsqueeze(0)
        return hidden, cell

class Decoder(nn.Module):
    def __init__(self, output_dim, embedding_dim, hidden_dim, n_layers=1, dropout=0.5):
        super().__init__()
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(output_dim, embedding_dim, padding_idx=0)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers,
                            dropout=dropout if n_layers > 1 else 0, batch_first=True)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_token, hidden, cell):
        input_token = input_token.unsqueeze(1)
        embedded = self.dropout(self.embedding(input_token))
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden, cell

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = trg.shape[0]
        trg_seq_len = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(batch_size, trg_seq_len, trg_vocab_size).to(self.device)
        hidden, cell = self.encoder(src)
        input_token = trg[:, 0]
        for t in range(1, trg_seq_len):
            prediction, hidden, cell = self.decoder(input_token, hidden, cell)
            outputs[:, t, :] = prediction
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = prediction.argmax(1)
            input_token = trg[:, t] if teacher_force else top1
        return outputs

    def translate(self, src_sentence, src_vocab, trg_vocab, src_idx_to_word, trg_idx_to_word, max_len=MAX_SEQUENCE_LENGTH):
        self.eval()
        src_tokens = tokenize_en(src_sentence)
        src_encoded = encode_sequence(src_tokens, src_vocab, max_len)
        src_tensor = torch.tensor(src_encoded, dtype=torch.long).unsqueeze(0).to(self.device)
        with torch.no_grad():
            encoder_hidden, encoder_cell = self.encoder(src_tensor)
        input_token = torch.tensor([trg_vocab[SOS_TOKEN]], dtype=torch.long).to(self.device)
        hidden = encoder_hidden
        cell = encoder_cell
        predicted_trg_tokens = []
        for _ in range(max_len):
            with torch.no_grad():
                prediction, hidden, cell = self.decoder(input_token, hidden, cell)
            predicted_token_index = prediction.argmax(1).item()
            predicted_token = trg_idx_to_word.get(predicted_token_index, UNK_TOKEN)
            if predicted_token == EOS_TOKEN:
                break
            predicted_trg_tokens.append(predicted_token)
            input_token = torch.tensor([predicted_token_index], dtype=torch.long).to(self.device)
        return " ".join(predicted_trg_tokens)

In [22]:
# --- Model Initialization ---
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 1
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(src_vocab_size, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT, bidirectional=True).to(device)
dec = Decoder(trg_vocab_size, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT).to(device)
model = Seq2Seq(enc, dec, device).to(device)

In [23]:
# --- Loss Function and Optimizer ---
criterion = nn.CrossEntropyLoss(ignore_index=trg_vocab[PAD_TOKEN])
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [24]:
# --- Training Loop ---
def train_seq2seq(model, iterator, optimizer, criterion, clip=1.0, teacher_forcing_ratio=0.5):
    model.train()
    epoch_loss = 0
    for src, trg in tqdm(iterator, desc="Training"):
        src, trg = src.to(model.device), trg.to(model.device)
        optimizer.zero_grad()
        outputs = model(src, trg, teacher_forcing_ratio)
        output_dim = outputs.shape[-1]
        outputs = outputs[:, 1:].reshape(-1, output_dim)
        trg = trg[:, 1:].reshape(-1)
        loss = criterion(outputs, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

In [25]:
# --- Training Execution ---
N_EPOCHS = 100
print(f"\nStarting training for {N_EPOCHS} epochs...")
for epoch in range(N_EPOCHS):
    print(f"\nEpoch {epoch+1}/{N_EPOCHS}")
    print("-" * 10)
    train_loss = train_seq2seq(model, train_loader, optimizer, criterion)
    print(f"Train Loss: {train_loss:.4f}")

print("\nTraining complete!")


Starting training for 100 epochs...

Epoch 1/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.98it/s]


Train Loss: 6.5080

Epoch 2/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 6.1217

Epoch 3/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.98it/s]


Train Loss: 5.9688

Epoch 4/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 5.8163

Epoch 5/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.98it/s]


Train Loss: 5.6848

Epoch 6/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 5.5793

Epoch 7/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 5.4692

Epoch 8/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 5.4003

Epoch 9/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 5.3226

Epoch 10/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 5.2281

Epoch 11/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 5.1900

Epoch 12/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 5.1116

Epoch 13/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 5.0509

Epoch 14/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 4.9879

Epoch 15/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.9213

Epoch 16/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.8723

Epoch 17/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 4.8026

Epoch 18/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.7644

Epoch 19/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 4.6953

Epoch 20/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.6679

Epoch 21/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.6154

Epoch 22/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 4.5557

Epoch 23/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 4.5162

Epoch 24/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 4.4508

Epoch 25/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.4209

Epoch 26/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 4.3954

Epoch 27/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 4.3213

Epoch 28/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 4.2795

Epoch 29/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 4.2306

Epoch 30/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.1820

Epoch 31/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 4.1620

Epoch 32/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 4.1165

Epoch 33/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 4.0751

Epoch 34/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 4.0214

Epoch 35/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.9939

Epoch 36/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.9444

Epoch 37/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 3.9412

Epoch 38/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 3.8956

Epoch 39/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.8842

Epoch 40/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.97it/s]


Train Loss: 3.8321

Epoch 41/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 3.7915

Epoch 42/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.7302

Epoch 43/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.7090

Epoch 44/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 3.6949

Epoch 45/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.6337

Epoch 46/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.6111

Epoch 47/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.6090

Epoch 48/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.5500

Epoch 49/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.5349

Epoch 50/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 3.5131

Epoch 51/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.96it/s]


Train Loss: 3.4786

Epoch 52/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.4636

Epoch 53/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.4362

Epoch 54/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.3891

Epoch 55/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.3487

Epoch 56/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.3597

Epoch 57/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 3.3111

Epoch 58/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.2925

Epoch 59/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.2656

Epoch 60/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 3.2416

Epoch 61/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.2220

Epoch 62/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.1855

Epoch 63/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 3.1949

Epoch 64/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 3.1457

Epoch 65/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 3.1055

Epoch 66/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.95it/s]


Train Loss: 3.0884

Epoch 67/100
----------


Training: 100%|██████████| 157/157 [00:41<00:00,  3.80it/s]


Train Loss: 3.0916

Epoch 68/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.86it/s]


Train Loss: 3.0831

Epoch 69/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 3.0368

Epoch 70/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.90it/s]


Train Loss: 3.0128

Epoch 71/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.9897

Epoch 72/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.9610

Epoch 73/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.9519

Epoch 74/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.9529

Epoch 75/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.9148

Epoch 76/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 2.9064

Epoch 77/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.8649

Epoch 78/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 2.8394

Epoch 79/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.8290

Epoch 80/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.8117

Epoch 81/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.7995

Epoch 82/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.7725

Epoch 83/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.90it/s]


Train Loss: 2.7645

Epoch 84/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.7485

Epoch 85/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.7421

Epoch 86/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.7351

Epoch 87/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.89it/s]


Train Loss: 2.7309

Epoch 88/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.7014

Epoch 89/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.6711

Epoch 90/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.90it/s]


Train Loss: 2.6733

Epoch 91/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.6234

Epoch 92/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.6131

Epoch 93/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.6149

Epoch 94/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.90it/s]


Train Loss: 2.5714

Epoch 95/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 2.5663

Epoch 96/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.93it/s]


Train Loss: 2.5527

Epoch 97/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]


Train Loss: 2.5464

Epoch 98/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.92it/s]


Train Loss: 2.5389

Epoch 99/100
----------


Training: 100%|██████████| 157/157 [00:39<00:00,  3.94it/s]


Train Loss: 2.5005

Epoch 100/100
----------


Training: 100%|██████████| 157/157 [00:40<00:00,  3.91it/s]

Train Loss: 2.4927

Training complete!





In [26]:
# --- Example Translation ---
example_en_sentence = "Hello how are you today ?"
predicted_de_translation = model.translate(example_en_sentence, src_vocab, trg_vocab, src_idx_to_word, trg_idx_to_word)
print(f"\nOriginal English: {example_en_sentence}")
print(f"Predicted German: {predicted_de_translation}")


Original English: Hello how are you today ?
Predicted German: sind denn jetzt ?


In [27]:
# --- Saving the Trained Model ---
PATH = "translation_model.pth"
torch.save(model.state_dict(), PATH)
print(f"Model saved to {PATH}")

Model saved to translation_model.pth
