In [None]:
! pip install transformers sentencepiece datasets
! pip install tqdm
! pip install torch
!pip install sacrebleu
!pip install evaluate



In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch
import numpy as np
import math
import random
import os
from datasets import load_dataset, load_from_disk
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
    EarlyStoppingCallback,
    TrainerCallback
)
import evaluate
from typing import Dict, List, Optional, Union
import logging
import time
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("training.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Set up device and seed for reproducibility
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Model and configuration parameters
base_model_name = "google/mt5-small"
model_name = "JMwagunda/ENG-GIR-MODEL"
repo_id = "JMwagunda/ENG-GIR-MODEL"
output_dir = repo_id
max_length = 128
batch_size = 16
learning_rate = 5e-5
weight_decay = 0.01
num_epochs = 40
source_lang = "en"
target_lang = "sw"  # Nyf = Giriama language code
save_total_limit = 3
gradient_accumulation_steps = 4
max_grad_norm = 1.0  # Gradient clipping
warmup_ratio = 0.1
early_stopping_patience = 3

# Language tokens
lang_tokens = {
    'en': '<en>',
    'sw': '<sw>'
}

# repo_id = "Lingua-Connect/SWA_TrainerImproved"  # Your Hub repository ID

# Try to download the latest checkpoint from Hub
try:
    # Load the model and tokenizer from the downloaded checkpoint
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    print("Successfully loaded model and tokenizer from Hub checkpoint")

except Exception as e:
    print(f"No checkpoint found or error loading from Hub: {e}")
    print("Loading base model instead...")

    # Fallback to loading the base model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    special_tokens = {'additional_special_tokens': list(lang_tokens.values())}
    tokenizer.add_special_tokens(special_tokens)

    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    model.resize_token_embeddings(len(tokenizer))

# Create custom callback for monitoring and debugging
class MonitorCallback(TrainerCallback):
    def __init__(self):
        self.step_times = []
        self.last_time = time.time()
        self.step_loss = []

    def on_step_end(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            # Track loss value
            current_loss = logs["loss"]
            self.step_loss.append(current_loss)

            # Check for NaN or Inf
            if math.isnan(current_loss) or math.isinf(current_loss):
                logger.warning(f"WARNING: Abnormal loss detected: {current_loss}")

                # Check model weights for NaN
                for name, param in trainer.model.named_parameters():
                    if torch.isnan(param).any() or torch.isinf(param).any():
                        logger.warning(f"NaN or Inf found in parameter {name}")

            # Track step time
            current_time = time.time()
            step_time = current_time - self.last_time
            self.step_times.append(step_time)
            self.last_time = current_time

            # Report average step time and memory every 50 steps
            if state.global_step % 50 == 0:
                avg_step_time = sum(self.step_times[-50:]) / min(50, len(self.step_times))
                logger.info(f"Step {state.global_step}: Avg step time = {avg_step_time:.3f}s, Loss = {current_loss:.4f}")

                # Reset step times after reporting
                if len(self.step_times) > 100:
                    self.step_times = self.step_times[-50:]
                if len(self.step_loss) > 100:
                    self.step_loss = self.step_loss[-50:]

                # Report memory usage if on CUDA
                if torch.cuda.is_available():
                    mem_allocated = torch.cuda.memory_allocated() / 1024**2
                    mem_reserved = torch.cuda.memory_reserved() / 1024**2
                    logger.info(f"GPU Memory: Allocated = {mem_allocated:.1f}MB, Reserved = {mem_reserved:.1f}MB")



Successfully loaded model and tokenizer from Hub checkpoint


In [None]:
# Function to load preprocessed data or process it again if needed
def load_or_preprocess_data():

        # Load the dataset
        ds = load_dataset('Lingua-Connect/English-Giriama-Dataset')
        split_datasets = ds["train"].train_test_split(train_size=0.9, seed=seed)
        split_datasets["validation"] = split_datasets.pop("test")

        logger.info(f"Dataset loaded: {len(split_datasets['train'])} train, {len(split_datasets['validation'])} validation")

        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id

        # Define preprocessing function
        def preprocess_function(examples):
            # Prepare input texts with prefix
            source_prefix = f"translate {source_lang} to {target_lang}: "
            inputs = [source_prefix + en for en in examples['English Sentence'] if en is not None]
            targets = [str(sw) for sw in examples['Giriama Translation'] if sw is not None]

            # Check if inputs and targets have the same length after filtering
            if len(inputs) != len(targets):
                # Handle the case where they have different lengths
                min_len = min(len(inputs), len(targets))
                inputs = inputs[:min_len]
                targets = targets[:min_len]

            # Tokenize inputs
            model_inputs = tokenizer(
                inputs,
                max_length=max_length,
                truncation=True,
                padding="max_length",
                return_tensors=None
            )

            # Tokenize targets
            labels = tokenizer(
                targets,
                max_length=max_length,
                truncation=True,
                padding="max_length",
                return_tensors=None
            )

            # Add labels to model inputs
            model_inputs["labels"] = labels["input_ids"]

            # Replace pad token id with -100 in labels so it's ignored in loss computation
            for i in range(len(model_inputs["labels"])):
                pad_mask = [token == tokenizer.pad_token_id for token in model_inputs["labels"][i]]
                model_inputs["labels"][i] = [
                    -100 if mask else token
                    for mask, token in zip(pad_mask, model_inputs["labels"][i])
                ]

            return model_inputs

        # Process datasets
        logger.info("Processing datasets...")
        train_dataset = split_datasets["train"].map(
            preprocess_function,
            batched=True,
            batch_size=16,
            remove_columns=split_datasets["train"].column_names,
            desc="Preprocessing training dataset"
        )

        validation_dataset = split_datasets["validation"].map(
            preprocess_function,
            batched=True,
            batch_size=16,
            remove_columns=split_datasets["validation"].column_names,
            desc="Preprocessing validation dataset"
        )

        return train_dataset, validation_dataset

In [None]:
# Load data
train_dataset, validation_dataset = load_or_preprocess_data()

# Load model and tokenizer
logger.info(f"Loading model: {model_name}")
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure pad_token_id is set correctly
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.pad_token_id = tokenizer.pad_token_id

# # Move model to device
# model = model.to(device)
# logger.info(f"Model loaded with {model.num_parameters():,} parameters")

# Initialize output layer weights with small values for numerical stability
for name, param in model.named_parameters():
    if "decoder" in name and "dense" in name:
        logger.info(f"Initializing {name} with small values")
        torch.nn.init.normal_(param, mean=0.0, std=0.02)

# Prepare data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding="max_length",
    max_length=max_length,
    return_tensors="pt"
)


In [None]:
# Load metric for evaluation
metric = evaluate.load("sacrebleu")

In [None]:
def compute_metrics(eval_preds):
    preds, labels = eval_preds

    # In case the model returns more than the prediction logits
    if isinstance(preds, tuple):
        preds = preds[0]

    # Debug information - use print in addition to logger
    print(f"Prediction shape: {preds.shape}, Labels shape: {labels.shape}")
    logger.info(f"Prediction shape: {preds.shape}, Labels shape: {labels.shape}")

    try:
        # Check vocabulary boundaries
        vocab_size = tokenizer.vocab_size
        print(f"Tokenizer vocabulary size: {vocab_size}")
        logger.info(f"Tokenizer vocabulary size: {vocab_size}")

        # Replace token IDs that are out of vocabulary range with pad token ID
        invalid_indices = np.where((preds >= vocab_size) | (preds < 0))
        if invalid_indices[0].size > 0:
            print(f"Found {invalid_indices[0].size} token IDs outside vocab range. Replacing with pad token.")
            logger.warning(f"Found {invalid_indices[0].size} token IDs outside vocab range. Replacing with pad token.")
            preds[invalid_indices] = tokenizer.pad_token_id

        # Decode predictions
        decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

        # Handle labels: replace -100 with pad token ID and clip to valid range
        labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        invalid_label_indices = np.where((labels >= vocab_size) | (labels < 0))
        if invalid_label_indices[0].size > 0:
            print(f"Found {invalid_label_indices[0].size} label IDs outside vocab range. Replacing with pad token.")
            logger.warning(f"Found {invalid_label_indices[0].size} label IDs outside vocab range. Replacing with pad token.")
            labels[invalid_label_indices] = tokenizer.pad_token_id

        # Decode labels
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        # Post-processing
        decoded_preds = [pred.strip() for pred in decoded_preds]
        decoded_labels = [[label.strip()] for label in decoded_labels]

        # Debug output - print some examples with both print and logger
        print("\n===== PREDICTION EXAMPLES =====")
        for i in range(min(3, len(decoded_preds))):
            print(f"Pred[{i}]: {decoded_preds[i][:100]}...")
            print(f"Label[{i}]: {decoded_labels[i][0][:100]}...")
            # print("-" * 50)

            logger.info(f"Pred[{i}]: {decoded_preds[i][:100]}...")
            logger.info(f"Label[{i}]: {decoded_labels[i][0][:100]}...")

        # Ensure these examples are flushed to output
        import sys
        sys.stdout.flush()

        # Compute BLEU score
        result = metric.compute(predictions=decoded_preds, references=decoded_labels)

        # Add generation length
        prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
        result["gen_len"] = np.mean(prediction_lens)

        # Format results
        formatted_result = {
            "bleu": round(result["score"], 4),
            "gen_len": round(result["gen_len"], 4)
        }

        # Print final metrics
        print(f"\nMetrics: BLEU = {formatted_result['bleu']}, Gen Length = {formatted_result['gen_len']}")

        return formatted_result

    except Exception as e:
        # More detailed error logging
        error_msg = f"Error in compute_metrics: {e}"
        print(error_msg)
        logger.error(error_msg)

        import traceback
        tb = traceback.format_exc()
        print(f"Traceback: {tb}")
        logger.error(f"Traceback: {tb}")

        # Return zeros to prevent training from crashing
        return {"bleu": 0.0, "gen_len": 0.0}

In [None]:
# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    eval_strategy="epoch",
    # eval_steps=100,
    save_strategy="epoch",
    # save_steps=100,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=weight_decay,
    save_total_limit=save_total_limit,
    num_train_epochs=num_epochs,
    predict_with_generate=True,
    fp16=False,  # Disable mixed precision initially for stability
    push_to_hub=True,
    hub_model_id=repo_id,
    load_best_model_at_end=True,
    metric_for_best_model="bleu",
    greater_is_better=True,
    resume_from_checkpoint=True,
    max_grad_norm=max_grad_norm,
    gradient_accumulation_steps=gradient_accumulation_steps,
    logging_dir=f"./logs",
    logging_steps=10,
    generation_max_length=max_length,
    generation_num_beams=4,
    label_smoothing_factor=0.1,
    lr_scheduler_type="polynomial",
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    report_to="tensorboard"
)

# Initialize the trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[
        EarlyStoppingCallback(early_stopping_patience=early_stopping_patience),
        MonitorCallback()
    ]
)


  trainer = Seq2SeqTrainer(


In [None]:
# Initial evaluation
print("\nRunning initial evaluation...")
initial_eval_results = trainer.evaluate(max_length=max_length)
print(f"Initial evaluation results: {initial_eval_results}")


Running initial evaluation...




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 16126 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Jesu arihokala adzagonya kunena kare kukala nabii k'aheshimu kahi za ts'i ya kwakwe mwenye...
Label[0]: Kwani Jesu mwenye were waamba Nabii k'aishimiwa kahi za ts'i ya kwao...
Pred[1]: P'et'ero akienderera kunena akiamba Hatha simumanya kaheri kaheri...
Label[1]: P'et'ero akikanaiza kaheri kwa kuapa akiamba Mwanamulume iye simumanya kamare...
Pred[2]: P'et'ero akimudzigidzya akiamba Ndo anafundzi angine osi mandiokala manamukuluhira ela ro kuluhiro r...
Label[2]: P'et'ero akimudzigidzya akiamba Hatha kala osi mandakuricha mimi sindakuricha ng'o...

Metrics: BLEU = 9.079, Gen Length = 57.1151
Initial evaluation results: {'eval_loss': 3.278590679168701, 'eval_model_preparation_time': 0.0147, 'eval_bleu': 9.079, 'eval_gen_len': 57.1151, 'eval_runtime': 70.6712, 'eval_samples_per_second': 11.06

In [None]:
# Use the path if checkpoint was downloaded, otherwise let it default to None
trainer.train()

Epoch,Training Loss,Validation Loss,Model Preparation Time,Bleu,Gen Len
1,2.4017,3.266871,0.0147,8.839,55.7289
2,2.3843,3.283203,0.0147,8.9132,55.7967
3,2.3685,3.278569,0.0147,8.8844,55.5806
4,2.3123,3.299557,0.0147,8.7647,56.4514
5,2.3043,3.32051,0.0147,8.8872,55.9028
6,2.2423,3.342463,0.0147,8.911,55.9974
7,2.2314,3.351343,0.0147,9.1416,56.8223
8,2.2882,3.364541,0.0147,8.8545,55.7826
9,2.2807,3.338727,0.0147,9.0952,55.8312
10,2.2436,3.3336,0.0147,9.1919,56.4847




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 18100 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Jesu arihokala adzagonya kunena kare kukala nabii k'aheshimu kahi za ts'i ya kwakwe mwenye...
Label[0]: Kwani Jesu mwenye were waamba Nabii k'aishimiwa kahi za ts'i ya kwao...
Pred[1]: K'uzhona zho Mwanawe akikala haho kabila k'adzaumbwa kit'u chochosi...
Label[1]: Iye Masihi wakalako kare hatha kabila vit'u zhosi kuumbwa naye ndiye ariye dzulu za kila kit'u...
Pred[2]: Uvoro uu ndo fuhubiri kila mut'u ariyelagwa Masihi kwa ut'u wa wo musalabani Uwo ni ut'u wa Ayahudi ...
Label[2]: ela sino funamuhubiri Masihi ariyesulubiwa musalabani Uvoro uu unaatsukiza Ayahudi na kwa Ayunani ni...

Metrics: BLEU = 8.839, Gen Length = 55.7289




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19658 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Mimi ninamuhuma auye kwenu ela dzulu za yo mimi mwenye mwenye...
Label[0]: Bai namudzya kwako kaheri kwa vizho muhokere kwani iye be a moyoni mwangu...
Pred[1]: P'et'ero na Johana makimulola yuyahu mut'u ariyekala adzaamba...
Label[1]: Vikara P'et'ero na Johana marihoona vizho makimuthema dzitso ye mut'u gonya P'et'ero akimwamba Hulol...
Pred[2]: Mwenye mut'u akikala ana kit'u kinyume cha mut'u mungine kahi zenu kwanoni mbona munaenderera makosa...
Label[2]: Vidze k'amumanya kukala sino at'u a Mulungu fundahukumu hatha malaika H'aya be kala vizho funazhadim...

Metrics: BLEU = 8.9132, Gen Length = 55.7967




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 18224 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Mut'u yuyu wafuhendya fukakale ahumiki a kilagane kisha kwa at'u kwa kukala si kilagane cha sheria z...
Label[0]: Iye nde ariyefwadimira hukale ahumiki a kwakwe kahi za kilagane kisha Kilagane kiki si cha Sheria za...
Pred[1]: Vikara wakathi wa P'asaka urihofika Jesu wakwenda Jerusalemu...
Label[1]: Gonya wakathi wa sikuk'uu ya Kiyahudi ya P'asaka urihofika hehi Jesu wambuka kwenda Jerusalemu...
Pred[2]: Vikara Jesu na anafundzie makwenda Beth'isaida na at'u angine makimureha kipofu mut'u yuyahu mamugut...
Label[2]: Bai machenda hatha makifika Beth'isaida na kuko at'u makimurehera Jesu mut'u ariyekala ni kipofu na ...

Metrics: BLEU = 8.8844, Gen Length = 55.5806




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 18756 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Bai aryahu at'u abomu mariokala madzatoa kwa kuluhiro hatha makafwa K'amavipata viryahu Mulungu ariz...
Label[0]: At'u aa osi mafwa manakuluhira vingahokala zho marizholagwa ni Mulungu k'amavipatire Mahenda kuzhona...
Pred[1]: Ela kumbukira kwa yo zawadi uriyomup'a kukirira na unabii uriwo unawogerwa ni o azhere azhere kuikir...
Label[1]: Usikiriche bule cho kigerwacho cha kiroho kiricho ndani mwako Kigerwa kicho wakigerwa wakathi hariho...
Pred[2]: Uthawali uriwo unapigana na ye mwenye k'undaindatoa...
Label[2]: Ts'i at'ue makidzigazha makundi-makundi gahehago ts'i iyo k'aindatoa...

Metrics: BLEU = 8.7647, Gen Length = 56.4514




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19646 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Ela Elija ni kwa kukala were k'ahana mut'u kahi za o ache magungu a Iziraeli h'akeye nde ariyehumwa ...
Label[0]: Ela ko Elija k'ahumwirwe kwa yoyosi kahi za ao hat'u ha vizho akihumwa kwa mwanamuche gungu kuko Sar...
Pred[1]: Kwa vizho manahenda vivi kwa mahendzo na kumanya kukala Mulungu adzanip'a kazi ya kuhubiri wo Uvoro ...
Label[1]: Aa mahubirio kwa nia mbidzo manavihenda kwa ut'u wa mahendzo Manamanya kukala Mulungu adzanika haha ...
Pred[2]: Ye mkurima ahendaye kazi kwa bidii anafaa kula kwa kwandza kwa mavuno...
Label[2]: H'aya fukihala mufano wa muk'urima ni yuyahu ariye ana bidhii na kurima nde ariye anavirya akale wa ...

Metrics: BLEU = 8.8872, Gen Length = 55.9028




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19492 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Ni here zhomu zha kumala mwiri wa kifwa na undaona hat'u ha ts'i za mbazi haha dzulu...
Label[0]: Kwani Hadzihogwa kimba ndo nderi mathungananaho...
Pred[1]: Na vizho ndo virizho kahi za kuluhiro ni karakara kidza k'akuna kit'u chochochosi kala k'akihenda ki...
Label[1]: Kwa vizho mut'u adziambaye anamukuluhira Mulungu ela ko k'ana rorosi ridzo ahenderaro mut'u kuluhiro...
Pred[2]: Vidze aryahu ahumikio madzo kidza mahendzo ga hendani mukumbukira kukala una Bwana ko mulunguni...
Label[2]: Vikara ninwi murio muna atumwa hakikishani kukala munaahalirat'o na kwa hachi Musisahau kukala ninwi...

Metrics: BLEU = 8.911, Gen Length = 55.9974




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 16222 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: O at'u makimudzigidzya makiamba K'ahana p'ep'o adziyekuhendya amwalage...
Label[0]: O at'u makimwamba Una p'ep'o Ni h'ani amalaye kukwalaga bewe...
Pred[1]: Mut'u yuyu ni mut'u kahi za ziya zhosi zha mwiriwe Kidza mwiriwe unagwirira mwiriwe wowosi na uumbe ...
Label[1]: Bai lo ludhimi nalo ni here ts'ets'e ya moho lu thele maut'u manji mai na nikumuhendya mut'u akanong...
Pred[2]: Munamanya kukala at'u mario ni ai na enye dambi zidzazho munamanya kukala mahendao dambi zao ni kuhe...
Label[2]: Kwani mut'u dza yuyu unamanya kukala ana dambi na adzanongeka kidza anadzipiga ulongo mwenye...

Metrics: BLEU = 9.1416, Gen Length = 56.8223




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 18738 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Mukiona zo thabu nirizodzadzipata kwa uso na kidza mukisikira kukala nidzapata thabu nyinji zidzazo ...
Label[0]: Vikara nanwi mundadima kuhala muza hamwenga nami kahi za yo k'ondo K'ondo ii ni iyo muriyonona nikii...
Pred[1]: Ndo makipiga k'ululu na kuamba P'et'ero ye muthawali wa Ayahudi...
Label[1]: Gonya makikwatya kumuvudhya na kumulamusa makiamba K'una ut'u muheshimiwa muthawali wa Ayahudi...
Pred[2]: Jesu akienderera kunena akiamba Ndzoni ndani zangu nyosi mut'u adziyechoka kula kahi za wo muzigowe ...
Label[2]: Ndzoni kwangu ninwi nyosi musirimao na murio mudzaremererwa ni mizigo nami nindamuoyeza...

Metrics: BLEU = 8.8545, Gen Length = 55.7826




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19300 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Na vizho ndizho virizhokala virizhokala vindakala dza vizho ao at'u mafufulwao na mwiri wa mbazi nik...
Label[0]: Na vizho ndo virizho hatha kahi za kufufuka Wo mwiri uzikwao ni mwiri wa kufwa na kuola ela uryahu u...
Pred[1]: Kwa vizho hunakwenda ko ndze ya yo k'anda ili fupate aibu ii ariyokala nayo...
Label[1]: Kwa vizho bai nafumuthuwe Jesu kuko ndze ya k'ambi fukapate muthalo wehu kahi za riro hukanwa ariroh...
Pred[2]: Na pia kuna miri ya mulunguni na miri ya haha dhuniani Ela udzo wa miri ya mulunguni ni miri mwenga ...
Label[2]: Kuna miri ya mulunguni na miri ya dhuniani Udzo wa miri ya mulunguni u vingine na udzo wa miri ya dh...

Metrics: BLEU = 9.0952, Gen Length = 55.8312




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 18044 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Na kwa kukala Mulungu anadima kumup'a baraka nyinji kukira zhosi nanwi mundakala na vinji zhosi zhen...
Label[0]: Na Mulungu ana wadimi wa kumup'a ninwi zaidhi ya zho mumalazho kwamba siku zosi mukale na vit'u zha ...
Pred[1]: Ye mubomu wa shikari akimudzigidzya akiamba Bwana muhumiki wangu ni mukongo sana kitandani k'adima k...
Label[1]: Bwana muhumiki wangu ni mukongo sana Vilungozhe zhaholoza na analumwa k'azhadimikika...
Pred[2]: Bai ninwi mwasikira zho zhaambwa kukala Musikale na ut'u uriwo unaaaika ela hat'u ha vizho usitsuhe ...
Label[2]: Mwasikira kukala haho kapindi at'u maambwa Ukiapa mbere za Bwana usilahe ulongo bule thimiza kila ki...

Metrics: BLEU = 9.1919, Gen Length = 56.4847




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 21158 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Daudi mwenye anamwiha Masihi Bwana vino be anadimadze kukala mwana wa Daudi na at'u anji makimusirik...
Label[0]: Vikikala Daudi mwenye amwiha Bwana anadimadze kukala mwanawe Bai wo muthunganano mubomu ukimusirikiz...
Pred[1]: K'uzhona zho nikikala mut'u wa Ayahudi ili kwamba ao mario were k'amathawaliwa ni Sheria nakala here...
Label[1]: Kwa Ayahudi nakala here Muyahudi ili niavuhe Ayahudi Na ingahokala mimi si ts'ini za sheria yao naka...
Pred[2]: Kwa vizho bai mut'u yoyosi ariye k'amundathubu akidzigidzya haraka nindakudzirana na aa nao kwa mush...
Label[2]: H'aya thubuni kwani kala k'amundathubu nadza kare niapige ao mathuwao mafundisho mai na wo mushu umb...

Metrics: BLEU = 9.2663, Gen Length = 55.0818




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 21518 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Vikara Jesu arihokala akwendani ko Murima wa Mizeituni achenda hatha achangira kahi za wo murima wa ...
Label[0]: Ela Jesu ye achenda kahi za Murima wa Mizeituni...
Pred[1]: Bai aryahu mabaharia makigoha kwamba hupige go mawe makigatsuha nanga ne za madzi na makivoya makivo...
Label[1]: Bai kwa ut'u wa kuogoha p'ore makagwizanywa na ho mbararani mahala zo nanga ne na makizitharamusha m...
Pred[2]: Vikara kwereko kuko kundi bomu ra at'u mariokala mananena na Jesu kisiri-siri-siri-dzakala makiamba ...
Label[2]: Kidza kahi za ro kundi mwere muna masumuriro ga kinjama-njama dzuluze Angine were manaamba Ni mut'u ...

Metrics: BLEU = 9.4554, Gen Length = 55.4054




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 17196 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Ye muhumiki wa hiri akidzigidzya akiamba Bwana na wo mfumowe mwenga wa dhahabu nidzazhala ts'ano...
Label[0]: Gonya ye wa hiri akidza akiamba Bwana iryahu shilingi yo ya dhahabu yareha faidha ya shilingi nyingi...
Pred[1]: Nami namwandhikira kahi za baruwa yangu kwamba ushirikaane na at'u ahendzao...
Label[1]: Ela vikara ut'u niriwokala ninamwambira ni uu kukala musigwirane na mut'u ariye ana dzina ra ndugu g...
Pred[2]: Kwani Maoro ganaamba kukala Burahemu ana ana airi mameye wa mulume mumwenga were ni mwanamuche mutum...
Label[2]: Iyo inaamba kukala Burahemu were ana ana airi a kilume Mumwenga wamuzhala na mwanamuche mutumwa na m...

Metrics: BLEU = 9.4334, Gen Length = 55.9297




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19206 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Kwa vizho bai o at'u k'amakubalianire dzulu za Jesu...
Label[0]: Kwa vizho at'u makigazhikana kwa ut'u wa Jesu...
Pred[1]: Ela ninwi nanwi munafundisha kukala mut'u anadima kumwamba babaye na mameye vit'u nindazhala ili nid...
Label[1]: Ela nwi munafundisha kukala mut'u akihala kit'u adimacho kumup'a babaye hedu mameye na akakala adzam...
Pred[2]: Iye nde Mulungu ahuthizhaye ili kwamba kukirira kwa Jesu Masihi apate nguvu za nguma na wadimi wosi ...
Label[2]: Kwa iye Mulungu h'akeye ariye nde mwokoli wehu nguma na ubomu na wadimi navikale kwakwe kukirira kwa...

Metrics: BLEU = 9.2443, Gen Length = 55.7916




Prediction shape: (782, 128), Labels shape: (782, 128)
Tokenizer vocabulary size: 58950
Found 19510 token IDs outside vocab range. Replacing with pad token.

===== PREDICTION EXAMPLES =====
Pred[0]: Ela Elija ninaamba kukala k'ahana mut'u kahi za o ache magungu a Iziraeli h'akeye nde ariyehumwa kwa...
Label[0]: Ela ko Elija k'ahumwirwe kwa yoyosi kahi za ao hat'u ha vizho akihumwa kwa mwanamuche gungu kuko Sar...
Pred[1]: Kwa vizho bai mukirya mukahe hedu kunwa kikombe cha Bwana kwa ngira ambayo ye Bwana k'ana makosa ga ...
Label[1]: Kwa ut'u uwo wenye bai mut'u yoyosi andiyerya mukahe hedu kunwa kikombe cha Bwana kahi za ngira isiy...
Pred[2]: Bai aryahu mabaharia makigoha kwamba hupige go mawe makigatsuha nanga ne za madzi na makivoya makiuy...
Label[2]: Bai kwa ut'u wa kuogoha p'ore makagwizanywa na ho mbararani mahala zo nanga ne na makizitharamusha m...

Metrics: BLEU = 9.8268, Gen Length = 55.5908
