In [1]:
import torch
from transformers import MT5ForConditionalGeneration, MT5Tokenizer
from sacrebleu import corpus_bleu
from tqdm import tqdm

# === Load FLORES-200 test files ===
flores_path = "./data/flores200_dataset/devtest/"  # update if different

with open(f"{flores_path}/npi_Deva.devtest", encoding="utf-8") as f:
    sources = [line.strip() for line in f]

with open(f"{flores_path}/eng_Latn.devtest", encoding="utf-8") as f:
    references = [line.strip() for line in f]

# === Load model and tokenizer ===
model_path = "./mt5-npi-en"
device = torch.device("mps")

tokenizer = MT5Tokenizer.from_pretrained(model_path)
model = MT5ForConditionalGeneration.from_pretrained(model_path).to(device)
model.eval()

# === Translation function ===
def translate_nepali_to_english(texts):
    if isinstance(texts, str):
        texts = [texts]
    
    inputs = ["translate Nepali to English: " + t for t in texts]
    encoding = tokenizer(
        inputs,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=300
    ).to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=encoding["input_ids"],
            attention_mask=encoding["attention_mask"],
            max_length=300,
            num_beams=4
        )

    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

# === Batch inference ===
batch_size = 4
predictions = []

for i in tqdm(range(0, len(sources), batch_size)):
    batch = sources[i:i + batch_size]
    preds = translate_nepali_to_english(batch)
    predictions.extend(preds)

# === Evaluate BLEU ===
bleu = corpus_bleu(predictions, [references])
print(f"\n✅ Test BLEU score on FLORES Nepali → English: {bleu.score:.2f}")


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 253/253 [17:16<00:00,  4.09s/it]


✅ Test BLEU score on FLORES Nepali → English: 9.44



