# **<font style="color:black">Sequence to Sequence text generation by PyTorch (seq2seq)</font>**
-------------------

>Note: Apply it to machine translation on a dataset with German to English sentences, specifically the Multi30k dataset.

## **<font style="color:blue">Installation and import libraries</font>**
-------------------

In [1]:
!pip install spacy
!pip install tokenizers
!pip install sacrebleu
!pip install tqdm

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.1.1 sacrebleu-2.5.1


In [2]:
import os
import sys
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import spacy
import random
from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard
from torch.utils.data import Dataset, DataLoader
from sacrebleu import corpus_bleu
from collections import Counter
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm  # Import tqdm for the progress bar

%matplotlib inline

## **<font style="color:blue">Utils support function</font>**
-------------------

In [3]:
!python -m spacy download de_core_news_sm
!python -m spacy download en_core_web_sm

Collecting de-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m59.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Installing collected packages: de-core-news-sm
Successfully installed de-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
# Load spacy models for German and English
spacy_ger = spacy.load("de_core_news_sm")
spacy_eng = spacy.load("en_core_web_sm")

In [5]:
def tokenize_ger(text):
    return [tok.text for tok in spacy_ger.tokenizer(text)]

In [6]:
def tokenize_eng(text):
    return [tok.text for tok in spacy_eng.tokenizer(text)]

In [7]:
def translate_sentence(model, sentence, german_vocab, english_vocab, device, max_length=50):
    model.eval()

    # Handle different input types
    if isinstance(sentence, str):
        # Tokenize string input
        tokens = [token.text.lower() for token in german_vocab.tokenizer(sentence)]
        tokens = [german_vocab.sos_token] + tokens + [german_vocab.eos_token]
        indices = [german_vocab[token] for token in tokens]
    elif isinstance(sentence, torch.Tensor):
        # Use tensor directly as indices
        indices = sentence.tolist()  # Convert tensor to list of indices
        indices = [german_vocab[german_vocab.sos_token]] + indices + [german_vocab[german_vocab.eos_token]]
    else:
        raise ValueError("Sentence must be a string or a torch.Tensor")

    sentence_tensor = torch.LongTensor(indices).unsqueeze(0).to(device)

    # Encode sentence
    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)

    outputs = [english_vocab[english_vocab.sos_token]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)
        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()
        outputs.append(best_guess)
        if best_guess == english_vocab[english_vocab.eos_token]:
            break

    translated_sentence = [english_vocab.lookup_token(idx) for idx in outputs]
    return translated_sentence[1:]  # Exclude <sos>

In [8]:
def bleu_score(data, model, german_vocab, english_vocab, device):
    targets = []
    outputs = []
    for i, (src, trg) in enumerate(data):
        print(f"Iteration {i}:")
        print(f"src type: {type(src)}, shape: {src.shape}")
        print(f"trg type: {type(trg)}, shape: {trg.shape}")
        try:
            prediction = translate_sentence(model, src, german_vocab, english_vocab, device)
            print(f"Prediction: {prediction}")
            prediction = prediction[:-1]  # Remove <eos> token
            print(f"Prediction after trim: {prediction}")
            target_tokens = [english_vocab.lookup_token(idx.item()) for idx in trg]
            print(f"Target tokens: {target_tokens}")
            targets.append(target_tokens)
            outputs.append(prediction)
        except Exception as e:
            print(f"Error in iteration {i}: {e}")
            break
    print(f"Outputs sample: {outputs[:2]}")
    print(f"Targets sample: {targets[:2]}")
    try:
        score = corpus_bleu(outputs, targets)
        print(f"Raw BLEU score: {score}")
        return score
    except Exception as e:
        print(f"BLEU computation error: {e}")
        return 0.0

In [9]:
def save_checkpoint(state, filename="/kaggle/working/my_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)

In [10]:
def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [11]:
class Vocabulary:
    def __init__(self, tokens=None):
        self.token_to_idx = {}
        self.idx_to_token = []
        self.special_tokens = ["<pad>", "<sos>", "<eos>", "<unk>"]

        # Add special tokens to the vocabulary
        for token in self.special_tokens:
            self.add_token(token)

        if tokens:
            self.build_vocab(tokens)

        # Set attributes for special tokens
        self.pad_token = "<pad>"
        self.sos_token = "<sos>"
        self.eos_token = "<eos>"
        self.unk_token = "<unk>"

    def build_vocab(self, tokens, min_freq=2, max_size=10000):
        token_counts = Counter(tokens)
        for token, count in token_counts.items():
            if count >= min_freq:
                self.add_token(token)
                if len(self.token_to_idx) >= max_size:
                    break

    def add_token(self, token):
        if token not in self.token_to_idx:
            self.token_to_idx[token] = len(self.idx_to_token)
            self.idx_to_token.append(token)

    def __len__(self):
        return len(self.idx_to_token)

    def __getitem__(self, token):
        return self.token_to_idx.get(token, self.token_to_idx[self.unk_token])

    def lookup_token(self, idx):
        return self.idx_to_token[idx]

In [12]:
class Multi30kDataset(Dataset):
    def __init__(self, src_path, trg_path, german_vocab, english_vocab):
        self.src_sentences = self.load_data(src_path)
        self.trg_sentences = self.load_data(trg_path)
        self.german_vocab = german_vocab
        self.english_vocab = english_vocab

    def load_data(self, data_path):
        with open(data_path, 'r', encoding='utf-8') as file:
            return file.readlines()

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src = self.src_sentences[idx].strip()
        trg = self.trg_sentences[idx].strip()
        src_tokens = tokenize_ger(src)
        trg_tokens = tokenize_eng(trg)
        src_indices = [self.german_vocab[token] for token in src_tokens]
        trg_indices = [self.english_vocab[token] for token in trg_tokens]
        return torch.tensor(src_indices), torch.tensor(trg_indices)


def collate_fn(batch, pad_idx):
    src_batch, trg_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, padding_value=pad_idx)
    trg_batch = pad_sequence(trg_batch, padding_value=pad_idx)
    return src_batch, trg_batch

In [13]:
def collate_fn(batch, pad_idx):
    src_batch, trg_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, padding_value=pad_idx, batch_first=True)
    trg_batch = pad_sequence(trg_batch, padding_value=pad_idx, batch_first=True)
    max_len = max(src_batch.size(1), trg_batch.size(1))
    if src_batch.size(1) < max_len:
        src_padding = torch.full((src_batch.size(0), max_len - src_batch.size(1)), pad_idx, dtype=torch.long)
        src_batch = torch.cat([src_batch, src_padding], dim=1)
    if trg_batch.size(1) < max_len:
        trg_padding = torch.full((trg_batch.size(0), max_len - trg_batch.size(1)), pad_idx, dtype=torch.long)
        trg_batch = torch.cat([trg_batch, trg_padding], dim=1)
    #print(f"src_batch type: {src_batch.dtype}, trg_batch type: {trg_batch.dtype}")
    return src_batch, trg_batch

In [14]:
# Load data
train_src_path = os.path.join('/kaggle','input','multi30k-de-en','training','train.de')
train_trg_path = os.path.join('/kaggle','input','multi30k-de-en','training','train.en')
valid_src_path = os.path.join('/kaggle','input','multi30k-de-en','validation','val.de')
valid_trg_path = os.path.join('/kaggle','input','multi30k-de-en','validation','val.en')
test_src_path = os.path.join('/kaggle','input','multi30k-de-en','mmt16_task1_test','test.de')
test_trg_path = os.path.join('/kaggle','input','multi30k-de-en','mmt16_task1_test','test.en')

In [15]:
# Build vocabularies
german_tokens_train = []
english_tokens_train = []
german_tokens_valid = []
english_tokens_valid = []
german_tokens_test = []
english_tokens_test = []

In [16]:
with open(train_src_path, 'r', encoding='utf-8') as file:
    for line in file:
        german_tokens_train.extend(tokenize_ger(line.strip()))

In [17]:
with open(train_trg_path, 'r', encoding='utf-8') as file:
    for line in file:
        english_tokens_train.extend(tokenize_eng(line.strip()))

In [18]:
with open(valid_src_path, 'r', encoding='utf-8') as file:
    for line in file:
        german_tokens_valid.extend(tokenize_ger(line.strip()))

In [19]:
with open(valid_trg_path, 'r', encoding='utf-8') as file:
    for line in file:
        english_tokens_valid.extend(tokenize_eng(line.strip()))

In [20]:
with open(test_src_path, 'r', encoding='utf-8') as file:
    for line in file:
        german_tokens_test.extend(tokenize_ger(line.strip()))

In [21]:
with open(test_trg_path, 'r', encoding='utf-8') as file:
    for line in file:
        english_tokens_test.extend(tokenize_eng(line.strip()))

In [22]:
german_vocab_train = Vocabulary()
english_vocab_train = Vocabulary()
german_vocab_valid = Vocabulary()
english_vocab_valid = Vocabulary()
german_vocab_test = Vocabulary()
english_vocab_test = Vocabulary()

In [23]:
german_vocab_train.build_vocab(german_tokens_train, min_freq=1, max_size=20000)
english_vocab_train.build_vocab(english_tokens_train, min_freq=1, max_size=20000)
german_vocab_valid.build_vocab(german_tokens_valid, min_freq=1, max_size=20000)
english_vocab_valid.build_vocab(english_tokens_valid, min_freq=1, max_size=20000)
german_vocab_test.build_vocab(german_tokens_test, min_freq=1, max_size=20000)
english_vocab_test.build_vocab(english_tokens_test, min_freq=1, max_size=20000)

In [24]:
print(f"German vocab size for train: {len(german_vocab_train)}")
print(f"English vocab size for train: {len(english_vocab_train)}")
print(f"German vocab size for valid: {len(german_vocab_valid)}")
print(f"English vocab size for valid: {len(english_vocab_valid)}")
print(f"German vocab size for test: {len(german_vocab_test)}")
print(f"English vocab size for test: {len(english_vocab_test)}")

German vocab size for train: 19214
English vocab size for train: 10837
German vocab size for valid: 2355
English vocab size for valid: 2023
German vocab size for test: 2182
English vocab size for test: 1961


In [25]:
print(german_vocab_train["zwei"], english_vocab_train["two"])
print(german_vocab_train["<unk>"], english_vocab_train["<unk>"])

293 296
3 3


In [26]:
# Training hyperparameters
num_epochs = 10
learning_rate = 0.001
batch_size = 1

In [27]:
# Model hyperparameters
load_model = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(german_vocab_train)
input_size_decoder = len(english_vocab_train)
output_size = len(english_vocab_train)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

In [28]:
# Tensorboard to get nice loss plot
writer = SummaryWriter(f"runs/loss_plot")
step = 0

In [29]:
# Create data loaders
train_dataset = Multi30kDataset(train_src_path, train_trg_path, german_vocab_train, english_vocab_train)
valid_dataset = Multi30kDataset(valid_src_path, valid_trg_path, german_vocab_valid, english_vocab_valid)
test_dataset = Multi30kDataset(test_src_path, test_trg_path, german_vocab_test, english_vocab_test)

In [30]:
# Check the length of the dataset
print(f"Number of samples in train dataset: {len(train_dataset)}")
print(f"Number of samples in train dataset: {len(valid_dataset)}")
print(f"Number of samples in train dataset: {len(test_dataset)}")

Number of samples in train dataset: 29001
Number of samples in train dataset: 1015
Number of samples in train dataset: 1000


In [31]:
datasets_list = [train_dataset, valid_dataset, test_dataset]
names_list = ['train dataset', 'validation dataset', 'test dataset']

In [32]:
# Inspect a few samples
for p, dataset in enumerate(datasets_list):
    print(f'Show {names_list[p]} samples.\n')
    for i in range(min(5, len(dataset))):
        src, trg = train_dataset[i]
        print(f"Source: {src}")
        print(f"Target: {trg}")
    print(100*'-')

Show train dataset samples.

Source: tensor([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])
Target: tensor([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
Source: tensor([17,  7, 18, 19, 20, 21, 22, 16])
Target: tensor([15, 16, 17, 18, 19,  9, 20, 21, 22, 23, 24, 14])
Source: tensor([23, 24, 25, 26, 11, 21, 27, 28, 29, 16])
Target: tensor([25, 26, 27, 28, 29, 21, 30, 31, 14])
Source: tensor([23, 30, 11, 31, 32, 33, 34, 35, 36, 37, 38, 39, 21, 40, 16])
Target: tensor([25, 32, 17, 21, 33, 34, 35, 36, 37, 21, 38, 39, 21, 40, 14])
Source: tensor([ 4,  7, 41, 42, 43, 38, 44, 45, 46, 16])
Target: tensor([ 4, 16,  9, 41, 42, 43, 44, 45, 14])
----------------------------------------------------------------------------------------------------
Show validation dataset samples.

Source: tensor([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])
Target: tensor([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
Source: tensor([17,  7, 18, 19, 20, 21, 22, 16])
Target: tensor([15, 16, 17, 18, 19,  9,

In [33]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=lambda b: collate_fn(b, pad_idx=german_vocab_train["<pad>"]))
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=True, collate_fn=lambda b: collate_fn(b, pad_idx=german_vocab_valid["<pad>"]))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True, collate_fn=lambda b: collate_fn(b, pad_idx=german_vocab_test["<pad>"]))

In [34]:
for batch_idx, (src, trg) in enumerate(train_loader):
    print(f"Batch {batch_idx}: src shape={src.shape}, trg shape={trg.shape}")
    break

Batch 0: src shape=torch.Size([64, 24]), trg shape=torch.Size([64, 24])


In [35]:
for batch_idx, (src, trg) in enumerate(valid_loader):
    print(f"Batch {batch_idx}: src shape={src.shape}, trg shape={trg.shape}")
    break

Batch 0: src shape=torch.Size([64, 24]), trg shape=torch.Size([64, 24])


In [36]:
for batch_idx, (src, trg) in enumerate(test_loader):
    print(f"Batch {batch_idx}: src shape={src.shape}, trg shape={trg.shape}")
    break

Batch 0: src shape=torch.Size([64, 26]), trg shape=torch.Size([64, 26])


In [37]:
def collate_fn(batch, pad_idx):
    src_batch, trg_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, padding_value=pad_idx, batch_first=True)
    trg_batch = pad_sequence(trg_batch, padding_value=pad_idx, batch_first=True)
    
    max_len = max(src_batch.size(1), trg_batch.size(1))
    
    if src_batch.size(1) < max_len:
        src_padding = torch.full((src_batch.size(0), max_len - src_batch.size(1)), pad_idx, dtype=torch.long)
        src_batch = torch.cat([src_batch, src_padding], dim=1)
    
    if trg_batch.size(1) < max_len:
        trg_padding = torch.full((trg_batch.size(0), max_len - trg_batch.size(1)), pad_idx, dtype=torch.long)
        trg_batch = torch.cat([trg_batch, trg_padding], dim=1)
    
    #print(f"Sample src type: {src.dtype}, trg type: {trg.dtype}")
    return src_batch, trg_batch

In [38]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, dropout):
        super(Encoder, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout, batch_first=True)

    def forward(self, x):
        embedding = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedding)
        return hidden, cell

In [39]:
class Decoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers, dropout):
        super(Decoder, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        # x: (batch_size,) e.g., (1,)
        x = x.unsqueeze(1)  # (batch_size, 1) e.g., (1, 1)
        embedding = self.dropout(self.embedding(x))  # (batch_size, 1, embedding_size) e.g., (1, 1, embedding_size)
        #print(f"embedding shape: {embedding.shape}")
        outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))  # outputs: (batch_size, 1, hidden_size)
        #print(f"outputs shape: {outputs.shape}")
        predictions = self.fc(outputs.squeeze(1))  # (batch_size, output_size)
        return predictions, hidden, cell

In [40]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[0]
        target_len = target.shape[1]
        target_vocab_size = len(english_vocab_train)  # Adjust based on your vocabulary

        outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(self.device)

        hidden, cell = self.encoder(source)  # (batch_size, seq_len) -> (num_layers, batch_size, hidden_size)
        x = target[:, 0]  # (batch_size,)

        for t in range(1, target_len):
            output, hidden, cell = self.decoder(x, hidden, cell)
            outputs[:, t, :] = output
            best_guess = output.argmax(1)
            x = target[:, t] if random.random() < teacher_force_ratio else best_guess

        return outputs

In [41]:
encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout).to(device)
decoder_net = Decoder(input_size_decoder, decoder_embedding_size, hidden_size, output_size, num_layers, dec_dropout).to(device)

In [42]:
model = Seq2Seq(encoder_net, decoder_net, device).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [43]:
if load_model:
    load_checkpoint(torch.load(os.path.join('/kaggle','input','seq2seq_example_model','pytorch','train_variant','3','my_checkpoint.pth.tar'), weights_only=False), model, optimizer)

=> Loading checkpoint


In [44]:
# In your model initialization
for name, param in model.named_parameters():
    if param.numel() == 0:
        print(f"Warning: Zero-element tensor detected in parameter '{name}'")

In [45]:
pad_idx = english_vocab_train["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

In [46]:
sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."

In [47]:
def translate_sentence(model, sentence, german_vocab, english_vocab, device, max_length=50):
    model.eval()  # Ensure evaluation mode

    # Tokenize and convert sentence to tensor
    tokens = [token.lower() for token in sentence]
    tokens = [german_vocab.sos_token] + tokens + [german_vocab.eos_token]
    indices = [german_vocab[token] for token in tokens]
    # Correct shape to (batch_size, seq_len)
    sentence_tensor = torch.LongTensor(indices).unsqueeze(0).to(device)  # Shape: (1, seq_len)

    # Encode sentence
    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)  # Shape: (num_layers, 1, hidden_size)

    outputs = [english_vocab[english_vocab.sos_token]]

    for _ in range(max_length):
        # Correct shape to (batch_size,)
        previous_word = torch.LongTensor([outputs[-1]]).to(device)  # Shape: (1,)
        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()
        outputs.append(best_guess)
        if best_guess == english_vocab[english_vocab.eos_token]:
            break

    translated_sentence = [english_vocab.lookup_token(idx) for idx in outputs]
    return translated_sentence[1:]  # Exclude start token

In [48]:
# Example usage with a small batch
example_batch = [train_dataset[i] for i in range(2)]  # Get a small batch for testing
collated_batch = collate_fn(example_batch, pad_idx=german_vocab_train["<pad>"])
print(f"Collated source batch shape: {collated_batch[0].shape}")
print(f"Collated target batch shape: {collated_batch[1].shape}")

Collated source batch shape: torch.Size([2, 13])
Collated target batch shape: torch.Size([2, 13])


In [49]:
for epoch in range(num_epochs):
    print(f"[Epoch {epoch} / {num_epochs}]")
    checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
    save_checkpoint(checkpoint)

    model.eval()
    translated_sentence = translate_sentence(model, sentence, german_vocab_train, english_vocab_train, device)
    print(f"Translated example sentence: \n {translated_sentence}")

    model.train()
    for batch_idx, (inp_data, target) in enumerate(tqdm(train_loader, desc=f"Training Epoch {epoch}", leave=True)):
        inp_data, target = inp_data.to(device, dtype=torch.long), target.to(device, dtype=torch.long)
        #print(f"inp_data shape: {inp_data.shape}, target shape: {target.shape}")
        output = model(inp_data, target)
        #print(f"output shape: {output.shape}")
        
        output = output[:, 1:].reshape(-1, output.shape[-1])  # Skip <sos>
        target = target[:, 1:].reshape(-1)  # Skip <sos>
        #print(f"output_flat shape: {output.shape}, target_flat shape: {target.shape}")
        
        optimizer.zero_grad()
        loss = criterion(output, target)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()
        
        writer.add_scalar("Training loss", loss.item(), global_step=step)
        step += 1

[Epoch 0 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['to', 'our', 'help', ',', 'number', 'no', 'of', 'a', 'small', 'white', 'pattern', 'as', 'the', 'approaches', 'of', 'the', 'director', 'as', 'all', 'all', 'dressed', 'in', 'white', 'looking', 'shirt', '.', 'the', 'other', "'s", 'dress', '.', 'shirt', 'and', 'the', 'white', 'shirt', '.', 'to', 'tag', '.', 'tag', '.', '.', '.', '.', "'s", 'shirt', '.', "'s", 'hand']


Training Epoch 0: 100%|██████████| 454/454 [02:04<00:00,  3.65it/s]


[Epoch 1 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['to', 'our', 'this', ',', 'you', 'I', 'a', 'small', 'small', 'pattern', 'on', 'a', 'very', 'on', 'a', 'player', 'who', 'dressed', 'in', 'very', 'orange', 'bent', "'s", '.', '.', 'dress', "'s", 'dressed', 'and', 'blue', 'dress', 'with', 'the', 'funky', 'looking', 'green', 'mold', 'and', 'the', 'one', 'who', "'s", 'arm', 'very', 'tag', 'looking', 'on', 'to', 'her', 'purple']


Training Epoch 1: 100%|██████████| 454/454 [02:16<00:00,  3.32it/s]


[Epoch 2 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['-', 'haired', 'female', ',', 'you', 'in', 'only', '-', 'a', 'stare', 'at', 'a', 'final', 'time', 'at', 'the', 'final', 'white', 'team', ',', 'dressed', 'in', 'very', 'white', 'dress', "'s", 'face', '.', 'a', 'gray', 'shirt', 'looking', '.', 'the', 'first', '.', '.', 'the', 'white', 'shirt', '.', '.', 'first', '.', '.', '.', "'s", '.', '.', '.']


Training Epoch 2: 100%|██████████| 454/454 [02:16<00:00,  3.32it/s]


[Epoch 3 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['can', "'s", 'head', ',', 'including', 'each', 'other', "'s", 'able', 'to', 'time', 'out', 'at', 'a', 'tree', '.', '.', 'the', 'John', '.', 'dressed', 'in', 'very', 'gray', 'in', 'a', 'gray', ',', 'a', 'gray', 'leather', 'dress', ',', 'and', 'the', 'the', 'leather', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']


Training Epoch 3: 100%|██████████| 454/454 [02:16<00:00,  3.33it/s]


[Epoch 4 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['-', 'haired', 'female', "'s", 'heart', 'in', 'stare', 'up', 'and', 'and', 'a', 'adults', 'of', 'the', 'gray', 'hard', 'to', 'a', 'member', 'of', 'the', 'help', '-', 'member', 'held', 'on', 'the', 'purple', 'and', 'red', 'team', ',', 'a', 'gray', 'looking', 'on', 'looking', 'down', ',', 'member', ',', 'looking', 'to', 'first', ',', 'as', 'the', 'umpire', ',', 'dressed']


Training Epoch 4: 100%|██████████| 454/454 [02:16<00:00,  3.33it/s]


[Epoch 5 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['-', 'haired', 'let', "'s", 'heart', 'as', 'a', 'musical', '-', 'a', 'male', ',', 'at', 'the', 'farm', ',', 'the', 'the', 'black', 'woman', 'is', 'pursued', 'out', 'the', 'the', 'ball', 'in', 'the', 'gray', 'team', 'of', 'the', 'ball', 'looking', 'team', 'to', 'her', 'team', '.', 'woman', 'member', 'of', 'the', 'white', 'and', 'red', 'team', 'looking', 'down', 'the']


Training Epoch 5: 100%|██████████| 454/454 [02:17<00:00,  3.30it/s]


[Epoch 6 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['-', 'aged', 'let', ',', 'you', 'guard', 'a', 'white', 'and', 'gray', ',', 'red', 'red', 'red', 'red', '"', '-', 'player', 'crosses', 'the', 'orange', '-', 'white', 'ball', '.', 'dressed', 'in', 'the', 'the', 'sun', '.', 'the', 'ball', '.', 'the', 'operating', 'leather', 'shirt', '.', 'the', 'the', 'leather', 'the', 'but', 'the', 'suit', '.', 'dressed', 'to', 'the']


Training Epoch 6: 100%|██████████| 454/454 [02:16<00:00,  3.32it/s]


[Epoch 7 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['-', 'aged', 'let', 'this', 'band', 'works', 'and', 'a', 'new', 'white', 'a', 'red', '-', 'lit', 'player', 'out', 'of', 'the', 'box', '.', 'a', 'member', 'will', 'held', 'by', 'the', 'basket', '.', 'the', 'ball', '.', '"', '.', 'looking', 'by', 'green', 'ribbon', '.', 'but', 'looking', 'over', 'the', ',', 'one', 'in', 'green', 'and', 'looking', 'on', '.']


Training Epoch 7: 100%|██████████| 454/454 [02:16<00:00,  3.33it/s]


[Epoch 8 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['can', 'members', 'of', 'the', 'colorful', 'colorful', 'stand', ',', 'a', 'of', 'a', 'helping', 'of', 'the', 'first', ',', 'first', 'ball', 'first', ',', 'first', 'first', 'one', 'of', 'the', 'first', '5', 'team', '.', 'green', 'funky', 'looking', 'around', 'the', ',', 'and', 'family', 'looking', 'over', 'funky', ',', 'toy', 'a', 'funky', ',', 'green', 'funky', 'funky', ',', 'and']


Training Epoch 8: 100%|██████████| 454/454 [02:17<00:00,  3.30it/s]


[Epoch 9 / 10]
=> Saving checkpoint
Translated example sentence: 
 ['of', 'our', 'white', ',', 'you', 'have', 'a', 'small', 'a', 'guitar', 'out', 'of', 'the', 'final', 'red', 'ball', 'as', 'a', 'player', 'lands', 'over', 'the', 'funky', 'box', '.', 'the', 'funky', 'looking', 'over', 'the', 'funky', 'looking', 'shirt', '.', 'funky', 'funky', 'down', 'next', 'to', 'the', 'funky', 'box', 'next', 'to', 'funky', 'looking', 'down', '.', 'family', "'s"]


Training Epoch 9: 100%|██████████| 454/454 [02:16<00:00,  3.33it/s]


In [50]:
# load actual trained model
load_checkpoint(torch.load(os.path.join('/kaggle','working','my_checkpoint.pth.tar'), weights_only=False), model, optimizer)

=> Loading checkpoint


In [51]:
sentence = "Zwei junge weiße Männer sind im Freien."
translated = translate_sentence(model, sentence, german_vocab_train, english_vocab_train, device)
print(f"Translated: {translated}")

Translated: ['-', 'haired', 'let', "'s", 'the', 'colorful', 'and', 'and', ',', 'and', 'away', 'of', 'the', 'the', 'floor', ',', 'a', 'very', 'damp', 'of', 'a', 'first', 'out', 'of', 'a', 'funky', 'looking', 'red', 'member', 'looking', 'down', '.', 'face', '-', 'family', '.', '.', '.', 'has', 'air', '.', 'the', 'the', '.', '.', '.', '.', '.', '.', '.']


In [52]:
score = bleu_score(test_dataset, model, german_vocab_train, english_vocab_train, device)
print(f"Bleu score {score * 100:.2f}")

Iteration 0:
src type: <class 'torch.Tensor'>, shape: torch.Size([11])
trg type: <class 'torch.Tensor'>, shape: torch.Size([10])
Error in iteration 0: 'Tensor' object has no attribute 'lower'
Outputs sample: []
Targets sample: []
BLEU computation error: list index out of range
Bleu score 0.00


In [53]:
def zip_folder_with_shutil(source_folder, output_path):
    '''Function for zip TensorBoard data'''
    shutil.make_archive(output_path, 'zip', source_folder)

In [54]:
zip_folder_with_shutil('/kaggle/working/runs', '/kaggle/working/runs')

## **<font style="color:blue">Preferences</font>**
-------------------

- [YOUTUBE - Pytorch Seq2Seq Tutorial for Machine Translation](https://www.youtube.com/watch?v=EoGUlvhRYpk&list=PLhhyoLH6Ijfyl_VMCsi54UqGQafGkNOQH)
- [GitHub - Machine-Learning-Collection](https://github.com/aladdinpersson/Machine-Learning-Collection)
- [kaggle - model checkpoint](https://www.kaggle.com/models/radimkzl/seq2seq_example_model)