In [None]:

import os, sys, json
from pathlib import Path
import torch

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

home = Path.home()
desktop = home / "Desktop"
base_dir = desktop / "HindiToEnglishMT"
if not base_dir.exists():
    base_dir = Path.cwd().resolve().parent
print("Base dir:", base_dir)

sys.path.append(str(base_dir / "utils"))
import sentencepiece as spm
from model_utils import Seq2SeqTransformer
from evaluation_utils import compute_metrics, token_prf1


In [None]:

# Load test data and best model
processed_dir = base_dir / "data" / "processed"
test_path = processed_dir / "test_tokenized.jsonl"

def load_jsonl(path):
    data = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line))
    return data

test_data = load_jsonl(test_path)
print("Test examples:", len(test_data))

sp = spm.SentencePieceProcessor(model_file=str(base_dir / "models" / "vocab" / "hi_en_unigram.model"))
pad_id = sp.pad_id(); bos_id = sp.bos_id(); eos_id = sp.eos_id()
vocab_size = sp.vocab_size()

# Load model
ckpt = torch.load(base_dir / "models" / "from_scratch" / "best_model.pt", map_location=device)
model = Seq2SeqTransformer(vocab_size=vocab_size, pad_id=pad_id).to(device)
model.load_state_dict(ckpt)
model.eval()


In [None]:

# Decode test set (beam search for better quality)
from tqdm import tqdm

hyps_txt = []
refs_txt = []
hyp_ids_all = []
ref_ids_all = []

for ex in tqdm(test_data[:2000], desc="Decoding (subset for speed)"):
    # Limit to subset by default; increase/remove slice for full test (may be slow)
    src = torch.tensor([ex["src"]], dtype=torch.long, device=device)
    out_ids = model.beam_search(src, max_len=128, bos_id=bos_id, eos_id=eos_id, beam_size=4, length_penalty=0.6)
    hyp_ids = out_ids[0].tolist()
    # Remove initial BOS
    if len(hyp_ids) > 0 and hyp_ids[0] == bos_id:
        hyp_ids = hyp_ids[1:]
    # Cut at EOS
    if eos_id in hyp_ids:
        hyp_ids = hyp_ids[:hyp_ids.index(eos_id)]
    ref_ids = ex["tgt"][1:]  # drop BOS
    if eos_id in ref_ids:
        ref_ids = ref_ids[:ref_ids.index(eos_id)]

    hyp_txt = sp.decode(hyp_ids)
    ref_txt = sp.decode(ref_ids)

    hyps_txt.append(hyp_txt)
    refs_txt.append(ref_txt)
    hyp_ids_all.append(hyp_ids)
    ref_ids_all.append(ref_ids)

print("Sample hypothesis:", hyps_txt[0])
print("Sample reference: ", refs_txt[0])


In [None]:

# Compute metrics
metrics = compute_metrics(hyps_txt, refs_txt)
print("Text metrics:", metrics)

# Token-level P/R/F1
prf1 = token_prf1(hyp_ids_all, ref_ids_all, pad_id=pad_id)
print("Token P/R/F1:", prf1)

# Save results
import json
results_dir = base_dir / "results"
results_dir.mkdir(parents=True, exist_ok=True)
with open(results_dir / "test_metrics.json", "w", encoding="utf-8") as f:
    json.dump({"text_metrics": metrics, "token_prf1": prf1}, f, indent=2)
print("Saved metrics to", results_dir / "test_metrics.json")
