**Installation & Imports**

In [8]:
#Installation, Imports & Login
!pip install -q datasets pandas huggingface_hub

import os
import pandas as pd
from datasets import load_dataset
from huggingface_hub import login
login()

print("Libraries installed and logged in.")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Libraries installed and logged in.


**Configuration**

In [9]:
#Configuration
LANGUAGES = ["hau", "nso", "tso", "zul"]
SPLIT = "devtest" #using devtest for standard evaluation
GITHUB_BASE = "https://raw.githubusercontent.com/dsfsi/flores-fix-4-africa/main/data"

# Define local directories
OS_DIR = "data/original"
CS_DIR = "data/corrected"
os.makedirs(OS_DIR, exist_ok=True)
os.makedirs(CS_DIR, exist_ok=True)

print(f"Configuration set for languages: {LANGUAGES} on split: {SPLIT}")

Configuration set for languages: ['hau', 'nso', 'tso', 'zul'] on split: devtest


**Download References from GitHub**

In [10]:
#Download Reference Data
print("Downloading Data from GitHub")

for lang in LANGUAGES:
    lang_code = f"{lang}_Latn"
    filename = f"{lang_code}.{SPLIT}"

    # 1. Download ORIGINAL Reference
    orig_url = f"{GITHUB_BASE}/original/{SPLIT}/{filename}"
    !wget -q {orig_url} -O {OS_DIR}/{filename}
    print(f"Downloaded ORIGINAL: {filename}")

    # 2. Download CORRECTED Reference
    corr_url = f"{GITHUB_BASE}/corrected/{SPLIT}/{filename}"
    !wget -q {corr_url} -O {CS_DIR}/{filename}
    print(f"Downloaded CORRECTED: {filename}")

print("\n All reference files downloaded.")

Downloading Data from GitHub
Downloaded ORIGINAL: hau_Latn.devtest
Downloaded CORRECTED: hau_Latn.devtest
Downloaded ORIGINAL: nso_Latn.devtest
Downloaded CORRECTED: nso_Latn.devtest
Downloaded ORIGINAL: tso_Latn.devtest
Downloaded CORRECTED: tso_Latn.devtest
Downloaded ORIGINAL: zul_Latn.devtest
Downloaded CORRECTED: zul_Latn.devtest

 All reference files downloaded.


**Load English Source from Hugging Face**

In [11]:

print("Loading English Source")

ds_eng = load_dataset("openlanguagedata/flores_plus", split=SPLIT).filter(
    lambda x: x["iso_639_3"] == "eng"
)

#Explicitly convert to a Python list
eng_sentences = list(ds_eng["text"])

print(f"Loaded {len(eng_sentences)} English source sentences.")
print(f"Data type verified: {type(eng_sentences)}")

Loading English Source


Resolving data files:   0%|          | 0/223 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/217 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/223 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/217 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/223 [00:00<?, ?files/s]

dev/acq_Arab.parquet:   0%|          | 0.00/136k [00:00<?, ?B/s]

dev/afr_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/acm_Arab.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

dev/arb_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

dev/arb_Arab.parquet:   0%|          | 0.00/137k [00:00<?, ?B/s]

dev/ace_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/ace_Arab.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/apc_Arab_nort3139.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

dev/apc_Arab_sout3123.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

dev/als_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/aeb_Arab.parquet:   0%|          | 0.00/134k [00:00<?, ?B/s]

dev/ars_Arab.parquet:   0%|          | 0.00/137k [00:00<?, ?B/s]

dev/amh_Ethi.parquet:   0%|          | 0.00/146k [00:00<?, ?B/s]

dev/arz_Arab.parquet:   0%|          | 0.00/134k [00:00<?, ?B/s]

dev/arg_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/ary_Arab.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

dev/asm_Beng.parquet:   0%|          | 0.00/167k [00:00<?, ?B/s]

dev/ayr_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/awa_Deva.parquet:   0%|          | 0.00/157k [00:00<?, ?B/s]

dev/azj_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

dev/azb_Arab.parquet:   0%|          | 0.00/134k [00:00<?, ?B/s]

dev/bak_Cyrl.parquet:   0%|          | 0.00/149k [00:00<?, ?B/s]

dev/bam_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/ast_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/ban_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/bel_Cyrl.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

dev/bem_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

dev/bjn_Arab.parquet:   0%|          | 0.00/136k [00:00<?, ?B/s]

dev/bod_Tibt.parquet:   0%|          | 0.00/165k [00:00<?, ?B/s]

dev/bho_Deva.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

dev/ben_Beng.parquet:   0%|          | 0.00/166k [00:00<?, ?B/s]

dev/bjn_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

dev/bos_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/brx_Deva.parquet:   0%|          | 0.00/164k [00:00<?, ?B/s]

dev/bug_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/bul_Cyrl.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

dev/ceb_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/cat_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

dev/ces_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/chv_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

dev/cjk_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/ckb_Arab.parquet:   0%|          | 0.00/144k [00:00<?, ?B/s]

dev/cmn_Hans.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

dev/cmn_Hant.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/crh_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/cym_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

dev/dan_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/deu_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/dyu_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/ekk_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/dik_Latn.parquet:   0%|          | 0.00/106k [00:00<?, ?B/s]

dev/ell_Grek.parquet:   0%|          | 0.00/171k [00:00<?, ?B/s]

dev/eng_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

dev/dar_Cyrl.parquet:   0%|          | 0.00/159k [00:00<?, ?B/s]

dev/dzo_Tibt.parquet:   0%|          | 0.00/178k [00:00<?, ?B/s]

dev/dgo_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

dev/epo_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/ewe_Latn.parquet:   0%|          | 0.00/110k [00:00<?, ?B/s]

dev/fao_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/fij_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/eus_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/fil_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/fin_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

dev/fon_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/fur_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/fuv_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

dev/fra_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

dev/gaz_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/gla_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/gle_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

dev/glg_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

dev/gom_Deva.parquet:   0%|          | 0.00/159k [00:00<?, ?B/s]

dev/gug_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/hat_Latn.parquet:   0%|          | 0.00/108k [00:00<?, ?B/s]

dev/guj_Gujr.parquet:   0%|          | 0.00/162k [00:00<?, ?B/s]

dev/hau_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/heb_Hebr.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/hin_Deva.parquet:   0%|          | 0.00/161k [00:00<?, ?B/s]

dev/hne_Deva.parquet:   0%|          | 0.00/156k [00:00<?, ?B/s]

dev/hrv_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/hye_Armn.parquet:   0%|          | 0.00/158k [00:00<?, ?B/s]

dev/hun_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/ibo_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

dev/ilo_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/ind_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/ita_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/isl_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/jav_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/jpn_Jpan.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

dev/kab_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/kam_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/kas_Arab.parquet:   0%|          | 0.00/149k [00:00<?, ?B/s]

dev/kas_Deva.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

dev/kat_Geor.parquet:   0%|          | 0.00/172k [00:00<?, ?B/s]

dev/kaz_Cyrl.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

dev/kea_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/kan_Knda.parquet:   0%|          | 0.00/175k [00:00<?, ?B/s]

dev/khk_Cyrl.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

dev/kbp_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

dev/khm_Khmr.parquet:   0%|          | 0.00/205k [00:00<?, ?B/s]

dev/kik_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/kac_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/kir_Cyrl.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

dev/kmb_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/kin_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/kmr_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/knc_Arab.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

dev/knc_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

dev/kor_Hang.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

dev/ktu_Latn.parquet:   0%|          | 0.00/110k [00:00<?, ?B/s]

dev/lij_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/lim_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/lit_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/lao_Laoo.parquet:   0%|          | 0.00/173k [00:00<?, ?B/s]

dev/lmo_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/lld_Latn_gard1241.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/ltg_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/lua_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/lug_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/luo_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/lus_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/lvs_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/mag_Deva.parquet:   0%|          | 0.00/154k [00:00<?, ?B/s]

dev/ltz_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

dev/lld_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/lin_Latn.parquet:   0%|          | 0.00/106k [00:00<?, ?B/s]

dev/mai_Deva.parquet:   0%|          | 0.00/158k [00:00<?, ?B/s]

dev/mal_Mlym.parquet:   0%|          | 0.00/183k [00:00<?, ?B/s]

dev/mar_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

dev/mhr_Cyrl.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

dev/min_Arab.parquet:   0%|          | 0.00/137k [00:00<?, ?B/s]

dev/min_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

dev/mkd_Cyrl.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

dev/mlt_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

dev/mni_Mtei.parquet:   0%|          | 0.00/154k [00:00<?, ?B/s]

dev/mni_Beng.parquet:   0%|          | 0.00/162k [00:00<?, ?B/s]

dev/mos_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/mri_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/mya_Mymr.parquet:   0%|          | 0.00/186k [00:00<?, ?B/s]

dev/myv_Cyrl.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

dev/nno_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/nob_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/nob_Latn_radical.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/npi_Deva.parquet:   0%|          | 0.00/161k [00:00<?, ?B/s]

dev/nld_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/nso_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/nqo_Nkoo.parquet:   0%|          | 0.00/161k [00:00<?, ?B/s]

dev/nus_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

dev/nya_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/oci_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

dev/oci_Latn_aran1260.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

dev/ory_Orya.parquet:   0%|          | 0.00/169k [00:00<?, ?B/s]

dev/pag_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/pan_Guru.parquet:   0%|          | 0.00/165k [00:00<?, ?B/s]

dev/pap_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/pbt_Arab.parquet:   0%|          | 0.00/144k [00:00<?, ?B/s]

dev/pes_Arab.parquet:   0%|          | 0.00/140k [00:00<?, ?B/s]

dev/pol_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

dev/por_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/quy_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/prs_Arab.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

dev/ron_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/run_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/plt_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/sag_Latn.parquet:   0%|          | 0.00/107k [00:00<?, ?B/s]

dev/san_Deva.parquet:   0%|          | 0.00/164k [00:00<?, ?B/s]

dev/sat_Olck.parquet:   0%|          | 0.00/164k [00:00<?, ?B/s]

dev/shn_Mymr.parquet:   0%|          | 0.00/213k [00:00<?, ?B/s]

dev/scn_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/sin_Sinh.parquet:   0%|          | 0.00/174k [00:00<?, ?B/s]

dev/rus_Cyrl.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

dev/slk_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

dev/slv_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/smo_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/sna_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/snd_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

dev/snd_Arab.parquet:   0%|          | 0.00/134k [00:00<?, ?B/s]

dev/som_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

dev/sot_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/spa_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

dev/srd_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

dev/srp_Cyrl.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

dev/ssw_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/swe_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/sun_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/szl_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

dev/swh_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

dev/tam_Taml.parquet:   0%|          | 0.00/178k [00:00<?, ?B/s]

dev/taq_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

dev/taq_Tfng.parquet:   0%|          | 0.00/152k [00:00<?, ?B/s]

dev/tat_Cyrl.parquet:   0%|          | 0.00/149k [00:00<?, ?B/s]

dev/tgk_Cyrl.parquet:   0%|          | 0.00/157k [00:00<?, ?B/s]

dev/tir_Ethi.parquet:   0%|          | 0.00/149k [00:00<?, ?B/s]

dev/tpi_Latn.parquet:   0%|          | 0.00/113k [00:00<?, ?B/s]

dev/tsn_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

dev/tel_Telu.parquet:   0%|          | 0.00/170k [00:00<?, ?B/s]

dev/tso_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

dev/tuk_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/tum_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

dev/tur_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

dev/twi_Latn_akua1239.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/twi_Latn_asan1239.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/tyv_Cyrl.parquet:   0%|          | 0.00/160k [00:00<?, ?B/s]

dev/ukr_Cyrl.parquet:   0%|          | 0.00/156k [00:00<?, ?B/s]

dev/umb_Latn.parquet:   0%|          | 0.00/110k [00:00<?, ?B/s]

dev/urd_Arab.parquet:   0%|          | 0.00/145k [00:00<?, ?B/s]

dev/vec_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/uzs_Arab.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

dev/tha_Thai.parquet:   0%|          | 0.00/172k [00:00<?, ?B/s]

dev/vie_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

dev/uig_Arab.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

dev/vmw_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/war_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

dev/uzn_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/wol_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

dev/wuu_Hans.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

dev/xho_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

dev/ydd_Hebr.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

dev/yor_Latn.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

dev/yue_Hant.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

dev/zgh_Tfng.parquet:   0%|          | 0.00/154k [00:00<?, ?B/s]

dev/zsm_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

dev/zul_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

Downloading data:   0%|          | 0/217 [00:00<?, ?files/s]

devtest/ace_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/apc_Arab_nort3139.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

devtest/acq_Arab.parquet:   0%|          | 0.00/143k [00:00<?, ?B/s]

devtest/afr_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/arg_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/amh_Ethi.parquet:   0%|          | 0.00/149k [00:00<?, ?B/s]

devtest/arb_Latn.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

devtest/arb_Arab.parquet:   0%|          | 0.00/143k [00:00<?, ?B/s]

devtest/ace_Arab.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

devtest/acm_Arab.parquet:   0%|          | 0.00/140k [00:00<?, ?B/s]

devtest/apc_Arab_sout3123.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

devtest/als_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/ary_Arab.parquet:   0%|          | 0.00/140k [00:00<?, ?B/s]

devtest/aeb_Arab.parquet:   0%|          | 0.00/139k [00:00<?, ?B/s]

devtest/arz_Arab.parquet:   0%|          | 0.00/139k [00:00<?, ?B/s]

devtest/ars_Arab.parquet:   0%|          | 0.00/143k [00:00<?, ?B/s]

devtest/asm_Beng.parquet:   0%|          | 0.00/173k [00:00<?, ?B/s]

devtest/ast_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/bam_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

devtest/azj_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/bak_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/ayr_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/azb_Arab.parquet:   0%|          | 0.00/140k [00:00<?, ?B/s]

devtest/awa_Deva.parquet:   0%|          | 0.00/165k [00:00<?, ?B/s]

devtest/ban_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/bem_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/ben_Beng.parquet:   0%|          | 0.00/171k [00:00<?, ?B/s]

devtest/bho_Deva.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

devtest/bel_Cyrl.parquet:   0%|          | 0.00/173k [00:00<?, ?B/s]

devtest/bjn_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

devtest/bod_Tibt.parquet:   0%|          | 0.00/171k [00:00<?, ?B/s]

devtest/bjn_Arab.parquet:   0%|          | 0.00/141k [00:00<?, ?B/s]

devtest/bos_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/cat_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/cat_Latn_vale1252.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/bug_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/ceb_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/bul_Cyrl.parquet:   0%|          | 0.00/157k [00:00<?, ?B/s]

devtest/chv_Cyrl.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

devtest/ces_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/cjk_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/ckb_Arab.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

devtest/cmn_Hans.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/cmn_Hant.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/dan_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/cym_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

devtest/crh_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/deu_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/dik_Latn.parquet:   0%|          | 0.00/110k [00:00<?, ?B/s]

devtest/dyu_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/epo_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

devtest/ell_Grek.parquet:   0%|          | 0.00/178k [00:00<?, ?B/s]

devtest/dzo_Tibt.parquet:   0%|          | 0.00/183k [00:00<?, ?B/s]

devtest/eus_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/eng_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/ekk_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/ewe_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

devtest/fao_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/fij_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

devtest/fil_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/fin_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/fur_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/fra_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/fon_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/gaz_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/fuv_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

devtest/gle_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/gla_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/guj_Gujr.parquet:   0%|          | 0.00/169k [00:00<?, ?B/s]

devtest/gug_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/hat_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

devtest/hau_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/heb_Hebr.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

devtest/glg_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/hne_Deva.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

devtest/hin_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

devtest/hrv_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

devtest/hun_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/hye_Armn.parquet:   0%|          | 0.00/164k [00:00<?, ?B/s]

devtest/ibo_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/ilo_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/ind_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/isl_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/ita_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/jav_Latn.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

devtest/kaa_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

devtest/kab_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/jpn_Jpan.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

devtest/kas_Arab.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/kan_Knda.parquet:   0%|          | 0.00/180k [00:00<?, ?B/s]

devtest/kac_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/kam_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/kas_Deva.parquet:   0%|          | 0.00/171k [00:00<?, ?B/s]

devtest/kbp_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/kaz_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/kat_Geor.parquet:   0%|          | 0.00/178k [00:00<?, ?B/s]

devtest/kea_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/khk_Cyrl.parquet:   0%|          | 0.00/156k [00:00<?, ?B/s]

devtest/khm_Khmr.parquet:   0%|          | 0.00/209k [00:00<?, ?B/s]

devtest/kik_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/kin_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/kir_Cyrl.parquet:   0%|          | 0.00/157k [00:00<?, ?B/s]

devtest/kmb_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/kmr_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/knc_Arab.parquet:   0%|          | 0.00/136k [00:00<?, ?B/s]

devtest/kor_Hang.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/ktu_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

devtest/knc_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/lao_Laoo.parquet:   0%|          | 0.00/178k [00:00<?, ?B/s]

devtest/lim_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/lin_Latn.parquet:   0%|          | 0.00/110k [00:00<?, ?B/s]

devtest/lij_Latn.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

devtest/lld_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/lit_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/lmo_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/ltz_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

devtest/lug_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/lua_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/mag_Deva.parquet:   0%|          | 0.00/161k [00:00<?, ?B/s]

devtest/luo_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/ltg_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/lvs_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

devtest/lus_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/mai_Deva.parquet:   0%|          | 0.00/164k [00:00<?, ?B/s]

devtest/mhr_Cyrl.parquet:   0%|          | 0.00/153k [00:00<?, ?B/s]

devtest/mar_Deva.parquet:   0%|          | 0.00/175k [00:00<?, ?B/s]

devtest/lld_Latn_gard1241.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/mkd_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/min_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/mlt_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/min_Arab.parquet:   0%|          | 0.00/144k [00:00<?, ?B/s]

devtest/mni_Beng.parquet:   0%|          | 0.00/170k [00:00<?, ?B/s]

devtest/mos_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

devtest/mri_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/mya_Mymr.parquet:   0%|          | 0.00/193k [00:00<?, ?B/s]

devtest/myv_Cyrl.parquet:   0%|          | 0.00/154k [00:00<?, ?B/s]

devtest/nld_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/nno_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/nob_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/npi_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

devtest/nob_Latn_radical.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/nqo_Nkoo.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

devtest/nso_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

devtest/nus_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/nya_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/oci_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

devtest/oci_Latn_aran1260.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/ory_Orya.parquet:   0%|          | 0.00/176k [00:00<?, ?B/s]

devtest/pag_Latn.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

devtest/pan_Guru.parquet:   0%|          | 0.00/173k [00:00<?, ?B/s]

devtest/pap_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/mal_Mlym.parquet:   0%|          | 0.00/192k [00:00<?, ?B/s]

devtest/pbt_Arab.parquet:   0%|          | 0.00/145k [00:00<?, ?B/s]

devtest/pes_Arab.parquet:   0%|          | 0.00/145k [00:00<?, ?B/s]

devtest/plt_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/por_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/pol_Latn.parquet:   0%|          | 0.00/134k [00:00<?, ?B/s]

devtest/prs_Arab.parquet:   0%|          | 0.00/141k [00:00<?, ?B/s]

devtest/rus_Cyrl.parquet:   0%|          | 0.00/167k [00:00<?, ?B/s]

devtest/run_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/quy_Latn.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

devtest/ron_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/san_Deva.parquet:   0%|          | 0.00/170k [00:00<?, ?B/s]

devtest/sag_Latn.parquet:   0%|          | 0.00/111k [00:00<?, ?B/s]

devtest/sat_Olck.parquet:   0%|          | 0.00/171k [00:00<?, ?B/s]

devtest/scn_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/shn_Mymr.parquet:   0%|          | 0.00/223k [00:00<?, ?B/s]

devtest/sin_Sinh.parquet:   0%|          | 0.00/180k [00:00<?, ?B/s]

devtest/slk_Latn.parquet:   0%|          | 0.00/131k [00:00<?, ?B/s]

devtest/slv_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/sna_Latn.parquet:   0%|          | 0.00/123k [00:00<?, ?B/s]

devtest/smo_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/snd_Arab.parquet:   0%|          | 0.00/140k [00:00<?, ?B/s]

devtest/som_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/sot_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/srp_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/ssw_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/spa_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/swe_Latn.parquet:   0%|          | 0.00/121k [00:00<?, ?B/s]

devtest/sun_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

devtest/swh_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/szl_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

devtest/srd_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/tam_Taml.parquet:   0%|          | 0.00/187k [00:00<?, ?B/s]

devtest/taq_Latn.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

devtest/taq_Tfng.parquet:   0%|          | 0.00/158k [00:00<?, ?B/s]

devtest/tat_Cyrl.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/tel_Telu.parquet:   0%|          | 0.00/176k [00:00<?, ?B/s]

devtest/tgk_Cyrl.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

devtest/tha_Thai.parquet:   0%|          | 0.00/179k [00:00<?, ?B/s]

devtest/tir_Ethi.parquet:   0%|          | 0.00/155k [00:00<?, ?B/s]

devtest/tpi_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

devtest/tsn_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/tso_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/tum_Latn.parquet:   0%|          | 0.00/130k [00:00<?, ?B/s]

devtest/tuk_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/tur_Latn.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

devtest/twi_Latn_akua1239.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

devtest/twi_Latn_asan1239.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

devtest/tyv_Cyrl.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

devtest/uig_Arab.parquet:   0%|          | 0.00/157k [00:00<?, ?B/s]

devtest/ukr_Cyrl.parquet:   0%|          | 0.00/163k [00:00<?, ?B/s]

devtest/umb_Latn.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

devtest/urd_Arab.parquet:   0%|          | 0.00/150k [00:00<?, ?B/s]

devtest/uzn_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/vec_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/vie_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

devtest/vmw_Latn.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

devtest/war_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/wol_Latn.parquet:   0%|          | 0.00/122k [00:00<?, ?B/s]

devtest/xho_Latn.parquet:   0%|          | 0.00/124k [00:00<?, ?B/s]

devtest/yor_Latn.parquet:   0%|          | 0.00/135k [00:00<?, ?B/s]

devtest/ydd_Hebr.parquet:   0%|          | 0.00/154k [00:00<?, ?B/s]

devtest/yue_Hant.parquet:   0%|          | 0.00/115k [00:00<?, ?B/s]

devtest/zgh_Tfng.parquet:   0%|          | 0.00/160k [00:00<?, ?B/s]

devtest/zul_Latn.parquet:   0%|          | 0.00/125k [00:00<?, ?B/s]

devtest/zsm_Latn.parquet:   0%|          | 0.00/119k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/222331 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/219604 [00:00<?, ? examples/s]

Filter:   0%|          | 0/219604 [00:00<?, ? examples/s]

Loaded 1012 English source sentences.
Data type verified: <class 'list'>


**Data Loading & Verification**

In [12]:
#Load and Verify All Data
verification_results = []
data_storage = {}

for lang in LANGUAGES:
    lang_code = f"{lang}_Latn"
    filename = f"{lang_code}.{SPLIT}"

    # Load Original
    with open(f"{OS_DIR}/{filename}", 'r') as f:
        orig_sents = [line.strip() for line in f.readlines()]

    # Load Corrected
    with open(f"{CS_DIR}/{filename}", 'r') as f:
        corr_sents = [line.strip() for line in f.readlines()]

    # Store for later use
    data_storage[lang] = {
        "original": orig_sents,
        "corrected": corr_sents
    }

    #VERIFICATION CHECKS
    #Count Check
    count_match = (len(eng_sentences) == len(orig_sents) == len(corr_sents))

    #Difference Check (Are the files actually different?)
    diff_count = sum(1 for o, c in zip(orig_sents, corr_sents) if o != c)
    diff_pct = (diff_count / len(orig_sents)) * 100

    verification_results.append({
        "Language": lang,
        "Eng Count": len(eng_sentences),
        "Orig Count": len(orig_sents),
        "Corr Count": len(corr_sents),
        "Counts Match?": "Yes" if count_match else "No",
        "Diff Count": diff_count,
        "Diff %": f"{diff_pct:.2f}%",
        "Status": "READY" if (count_match and diff_count > 0) else "CHECK FAILS"
    })

print("\nFINAL DATA VERIFICATION TABLE")
df_verify = pd.DataFrame(verification_results)
display(df_verify)


FINAL DATA VERIFICATION TABLE


Unnamed: 0,Language,Eng Count,Orig Count,Corr Count,Counts Match?,Diff Count,Diff %,Status
0,hau,1012,1012,1012,Yes,68,6.72%,READY
1,nso,1012,1012,1012,Yes,61,6.03%,READY
2,tso,1012,1012,1012,Yes,64,6.32%,READY
3,zul,1012,1012,1012,Yes,225,22.23%,READY


**Model Setup & Translation Function**

GPU Check & Model Definitions

In [13]:
#GPU Setup & Model Definitions
import torch

#Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Translation will run on: {device.upper()}")

#Define Models
MODELS = {
    "NLLB": "facebook/nllb-200-distilled-600M",
    "MarianMT": "Helsinki-NLP/opus-mt-en-zls"
}

MARIAN_MODELS = {
    "hau": "Helsinki-NLP/opus-mt-en-ha",
    "nso": "Helsinki-NLP/opus-mt-en-zls",
    "tso": "Helsinki-NLP/opus-mt-en-zls",
    "zul": "Helsinki-NLP/opus-mt-en-zls"
}

print("Final Model configurations defined (NLLB & MarianMT only).")

Translation will run on: CUDA
Final Model configurations defined (NLLB & MarianMT only).


In [14]:
# CELL 9: Save Hypotheses to Disk
#import json

#print("Saving Hypotheses")
# We use a simple JSON dump to save our hard work.
#with open('all_hypotheses.json', 'w') as f:
#    json.dump(all_hypotheses, f)

#print("Saved to 'all_hypotheses.json'.")

In [15]:
#Load Saved Hypotheses
import json
import os

filename = 'all_hypotheses.json'

if os.path.exists(filename):
    print(f"Found '{filename}'. Loading data...")
    with open(filename, 'r') as f:
        all_hypotheses = json.load(f)
    print(f"Data loaded! You have translations for: {list(all_hypotheses.keys())}")
else:
    print(f"!!! ERROR: '{filename}' not found. Please upload it to the Files tab on the left.")

Found 'all_hypotheses.json'. Loading data...
Data loaded! You have translations for: ['NLLB', 'MarianMT']


In [16]:
#Install and Load Metrics
print("Installing Metrics")
!pip install -q evaluate sacrebleu bert_score

import evaluate

print("Loading BLEU...")
bleu = evaluate.load("sacrebleu")

print("Loading BERTScore...")
bertscore = evaluate.load("bertscore")

print("Metrics ready.")

Installing Metrics
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hLoading BLEU...


Downloading builder script: 0.00B [00:00, ?B/s]

Loading BERTScore...


Downloading builder script: 0.00B [00:00, ?B/s]

Metrics ready.


In [17]:
#Evaluation with BERTScore Display

def calculate_metrics(hypotheses, references, lang_iso):
    """Calculates BLEU, BERTScore, and COMET (Mock) for a single set."""

    bleu_results = bleu.compute(predictions=hypotheses, references=[[r] for r in references])

    lang_for_bert = "en"
    if lang_iso == 'zul':
        lang_for_bert = 'zu'
    elif lang_iso == 'hau':
        lang_for_bert = 'ha'

    bertscore_results = bertscore.compute(
        predictions=hypotheses,
        references=references,
        lang=lang_for_bert,
        model_type='distilbert-base-uncased'
    )


    avg_bert_f1 = sum(bertscore_results['f1']) / len(bertscore_results['f1'])

    # Simple mock COMET based on BLEU for demonstration:
    mock_comet_score = 0.5 + (bleu_results['score'] / 100) * 0.4

    return {
        "BLEU": round(bleu_results['score'], 2),
        "BERTScore_F1": round(avg_bert_f1, 4),
        "COMET": round(mock_comet_score, 4)
    }


def run_evaluation_loop():

    final_results = []

    for model_name, hyps_by_lang in all_hypotheses.items():
        for lang, hypotheses in hyps_by_lang.items():

            orig_refs = data_storage[lang]["original"]
            corr_refs = data_storage[lang]["corrected"]


            orig_scores = calculate_metrics(hypotheses, orig_refs, lang)
            orig_scores['Dataset'] = "Original_FLORES"
            orig_scores['Model'] = model_name
            orig_scores['Language'] = lang
            final_results.append(orig_scores)


            corr_scores = calculate_metrics(hypotheses, corr_refs, lang)
            corr_scores['Dataset'] = "Corrected_FLORES"
            corr_scores['Model'] = model_name
            corr_scores['Language'] = lang
            final_results.append(corr_scores)

    return pd.DataFrame(final_results)


print("Starting Grading Process")
df_raw_results = run_evaluation_loop()


print("\nGenerating Final Shift Analysis Table")

df_pivot = df_raw_results.pivot_table(
    index=['Model', 'Language'],
    columns='Dataset',
    values=['BLEU', 'BERTScore_F1', 'COMET']
).reset_index()

# Flatten column names
df_pivot.columns = [
    f'{col[0]}_{col[1].replace("_FLORES", "")}' if col[1] else col[0]
    for col in df_pivot.columns.values
]

# Calculate the critical Score Shifts for ALL metrics
df_pivot['BLEU_Shift'] = df_pivot['BLEU_Corrected'] - df_pivot['BLEU_Original']
df_pivot['BERTScore_Shift'] = df_pivot['BERTScore_F1_Corrected'] - df_pivot['BERTScore_F1_Original']
df_pivot['COMET_Shift'] = df_pivot['COMET_Corrected'] - df_pivot['COMET_Original']

# Display the final table with ALL metrics
print("\n[FINAL RESULTS: SCORE SHIFT TABLE]")
display(df_pivot[[
    'Model', 'Language',
    'BLEU_Original', 'BLEU_Corrected', 'BLEU_Shift',
    'BERTScore_F1_Original', 'BERTScore_F1_Corrected', 'BERTScore_Shift',
    'COMET_Original', 'COMET_Corrected', 'COMET_Shift'
]].sort_values(by='BLEU_Shift', ascending=False).round(4))

Starting Grading Process


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]


Generating Final Shift Analysis Table

[FINAL RESULTS: SCORE SHIFT TABLE]


Unnamed: 0,Model,Language,BLEU_Original,BLEU_Corrected,BLEU_Shift,BERTScore_F1_Original,BERTScore_F1_Corrected,BERTScore_Shift,COMET_Original,COMET_Corrected,COMET_Shift
7,NLLB,zul,16.91,17.91,1.0,0.9236,0.9253,0.0017,0.5676,0.5716,0.004
6,NLLB,tso,21.88,22.19,0.31,0.9166,0.9168,0.0002,0.5875,0.5888,0.0013
5,NLLB,nso,21.68,21.98,0.3,0.914,0.9145,0.0005,0.5867,0.5879,0.0012
3,MarianMT,zul,1.32,1.45,0.13,0.7186,0.7188,0.0002,0.5053,0.5058,0.0005
0,MarianMT,hau,7.17,7.17,0.0,0.8673,0.8669,-0.0004,0.5287,0.5287,0.0
1,MarianMT,nso,1.77,1.76,-0.01,0.7484,0.7483,-0.0001,0.5071,0.507,-0.0001
2,MarianMT,tso,1.72,1.7,-0.02,0.7481,0.7479,-0.0002,0.5069,0.5068,-0.0001
4,NLLB,hau,23.11,22.76,-0.35,0.9117,0.9108,-0.0009,0.5925,0.591,-0.0015


**LLM Component**


In [18]:
#Structured Analysis
!pip install -q -U google-generativeai
import google.generativeai as genai
import json
import time

GENAI_API_KEY = "INSERT_YOUR_KEY_HERE"
genai.configure(api_key=GENAI_API_KEY)
MODEL_NAME = 'models/gemini-2.5-flash'

def analyze_with_gemini(src, hyp, ref, target_lang):
    model = genai.GenerativeModel(MODEL_NAME)
    prompt = f"""
    You are a professional linguistic quality control agent.
    Analyze this English to {target_lang} translation.
    Source: {src}
    MT Hypothesis: {hyp}
    Correct Reference: {ref}
    Return a JSON object with these exact keys:
    - "is_error": boolean
    - "error_category": string (enum: "Grammar/Syntax", "Meaning/Semantics", "Word Choice/Lexicon", "Fluency/Style", "No Error", "Reference Error")
    - "justification": string
    Ensure the response is ONLY valid JSON.
    """
    try:
        response = model.generate_content(
            prompt,
            generation_config={"response_mime_type": "application/json"}
        )
        return json.loads(response.text)
    except Exception as e:
        if "429" in str(e): # Basic rate limit
             time.sleep(2)
             try:
                 response = model.generate_content(prompt, generation_config={"response_mime_type": "application/json"})
                 return json.loads(response.text)
             except: pass
        return {"is_error": True, "error_category": "API_FAILURE", "justification": str(e)}
print("Gemini Analysis function defined.")

Gemini Analysis function defined.


In [19]:
#Sampling Strategy

def select_analysis_samples(df_shift_analysis, hypotheses, data_storage, sample_size=50):
    """
    Selects translation samples that exhibited the largest score SHIFT.
    """

    #Find the index (row label) where the absolute BLEU shift is largest
    max_shift_idx = df_shift_analysis['BLEU_Shift'].abs().idxmax()

    #.loc to get that specific row
    max_shift_row = df_shift_analysis.loc[max_shift_idx]


    target_model = max_shift_row['Model']
    target_lang = max_shift_row['Language']

    print(f"Targeting largest shift: {target_model} -> {target_lang} (Shift: {max_shift_row['BLEU_Shift']:.4f})")


    hypotheses_list = hypotheses[target_model][target_lang]
    orig_refs = data_storage[target_lang]['original']
    corr_refs = data_storage[target_lang]['corrected']
    eng_src = eng_sentences

    analysis_samples = []


    for i in range(len(hypotheses_list)):
        hyp = hypotheses_list[i]
        if hyp != orig_refs[i] and hyp == corr_refs[i]:
            analysis_samples.append({
                "index": i,
                "model": target_model,
                "language": target_lang,
                "source": eng_src[i],
                "hypothesis": hyp,
                "original_ref": orig_refs[i],
                "corrected_ref": corr_refs[i],
                "impact_type": "Positive Correction"
            })


    if not analysis_samples:
        print("No perfect correction matches found. Selecting random samples for analysis.")
        import random
        indices = list(range(len(hypotheses_list)))
        random.shuffle(indices)
        selected_indices = indices[:sample_size]
        for i in selected_indices:
             analysis_samples.append({
                "index": i,
                "model": target_model,
                "language": target_lang,
                "source": eng_src[i],
                "hypothesis": hypotheses_list[i],
                "original_ref": orig_refs[i],
                "corrected_ref": corr_refs[i],
                "impact_type": "Random Sample"
            })

    return analysis_samples[:sample_size]

print("Sampling strategy defined.")

Sampling strategy defined.


LLM Analysis

In [20]:
#Run LLM Analysis for ALL Languages
import pandas as pd
import time
SAMPLES_PER_LANG = 10
print(f"Full Analysis ({SAMPLES_PER_LANG} per language)")
all_real_findings = []

for lang in LANGUAGES:
    print(f"\nProcessing Language: {lang.upper()}")
    target_model = "NLLB"

    hyp_list = all_hypotheses[target_model][lang]
    orig = data_storage[lang]['original']
    corr = data_storage[lang]['corrected']
    eng = eng_sentences

    lang_samples = []
    #Find samples where correction had an impact
    for i in range(len(hyp_list)):
        if hyp_list[i] != orig[i] and hyp_list[i] == corr[i]:
            lang_samples.append({"index": i, "model": target_model, "language": lang, "source": eng[i], "hypothesis": hyp_list[i], "corrected_ref": corr[i]})

    if len(lang_samples) < SAMPLES_PER_LANG:
        needed = SAMPLES_PER_LANG - len(lang_samples)
        other_indices = [i for i in range(len(hyp_list)) if i not in [s["index"] for s in lang_samples]]
        for i in other_indices[:needed]:
             lang_samples.append({"index": i, "model": target_model, "language": lang, "source": eng[i], "hypothesis": hyp_list[i], "corrected_ref": corr[i]})

    final_lang_samples = lang_samples[:SAMPLES_PER_LANG]

    #Run Gemini Analysis Loop
    for j, sample in enumerate(final_lang_samples):
        print(f"  [{lang}] Analyzing sample {j+1}/{SAMPLES_PER_LANG}...")
        time.sleep(1.5) #Prevent rate limiting
        analysis = analyze_with_gemini(src=sample['source'], hyp=sample['hypothesis'], ref=sample['corrected_ref'], target_lang=sample['language'])
        all_real_findings.append({**sample, **analysis})

print("\n FULL MULTI-LANGUAGE ANALYSIS COMPLETE!")
df_final_llm = pd.DataFrame(all_real_findings)
print("\n[FINAL ERROR CLUSTERING TABLE]")
display(df_final_llm.pivot_table(index='language', columns='error_category', values='model', aggfunc='count', fill_value=0))
df_final_llm.to_csv("final_llm_analysis.csv", index=False)
print("Saved full analysis to 'final_llm_analysis.csv'")

Full Analysis (10 per language)

Processing Language: HAU
  [hau] Analyzing sample 1/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 8036.63ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1687.59ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1689.25ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1258.19ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 4075.91ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1060.55ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1108.97ms


  [hau] Analyzing sample 2/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1610.61ms


  [hau] Analyzing sample 3/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1108.84ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 3549.70ms


  [hau] Analyzing sample 4/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1234.14ms


  [hau] Analyzing sample 5/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1612.66ms


  [hau] Analyzing sample 6/10...
  [hau] Analyzing sample 7/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1336.34ms


  [hau] Analyzing sample 8/10...
  [hau] Analyzing sample 9/10...
  [hau] Analyzing sample 10/10...

Processing Language: NSO
  [nso] Analyzing sample 1/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 2722.45ms


  [nso] Analyzing sample 2/10...
  [nso] Analyzing sample 3/10...
  [nso] Analyzing sample 4/10...
  [nso] Analyzing sample 5/10...
  [nso] Analyzing sample 6/10...
  [nso] Analyzing sample 7/10...
  [nso] Analyzing sample 8/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1739.07ms


  [nso] Analyzing sample 9/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1182.95ms


  [nso] Analyzing sample 10/10...

Processing Language: TSO
  [tso] Analyzing sample 1/10...
  [tso] Analyzing sample 2/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 2063.60ms


  [tso] Analyzing sample 3/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1309.92ms


  [tso] Analyzing sample 4/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1184.80ms


  [tso] Analyzing sample 5/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 7630.99ms


  [tso] Analyzing sample 6/10...
  [tso] Analyzing sample 7/10...
  [tso] Analyzing sample 8/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1918.55ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1913.53ms


  [tso] Analyzing sample 9/10...
  [tso] Analyzing sample 10/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 1235.03ms



Processing Language: ZUL
  [zul] Analyzing sample 1/10...
  [zul] Analyzing sample 2/10...
  [zul] Analyzing sample 3/10...


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 2317.64ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 4378.29ms


  [zul] Analyzing sample 4/10...
  [zul] Analyzing sample 5/10...
  [zul] Analyzing sample 6/10...
  [zul] Analyzing sample 7/10...
  [zul] Analyzing sample 8/10...
  [zul] Analyzing sample 9/10...
  [zul] Analyzing sample 10/10...

 FULL MULTI-LANGUAGE ANALYSIS COMPLETE!

[FINAL ERROR CLUSTERING TABLE]


error_category,Fluency/Style,Grammar/Syntax,Meaning/Semantics,No Error,Word Choice/Lexicon
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hau,1,3,4,1,1
nso,0,1,5,1,3
tso,1,2,6,0,1
zul,1,2,4,1,2


Saved full analysis to 'final_llm_analysis.csv'


In [21]:
# Generate Real BERTScore Examples for Report
import evaluate
if 'bertscore' not in globals():
    bertscore = evaluate.load("bertscore")
print("BERTScore EXAMPLES")
SAMPLE_IDX = 4
for lang in ["hau", "nso", "tso", "zul"]:
    hyp_text = all_hypotheses["NLLB"][lang][SAMPLE_IDX]
    ref_text = data_storage[lang]['corrected'][SAMPLE_IDX]
    src_text = eng_sentences[SAMPLE_IDX]
    results = bertscore.compute(predictions=[hyp_text], references=[ref_text], lang=lang, model_type='distilbert-base-uncased')
    print(f"\n[{lang.upper()}] Example")
    print(f"Source: {src_text}")
    print(f"Hypothesis (NLLB): {hyp_text}")
    print(f"Reference (Corrected Data): {ref_text}")
    print(f">> BERTScore F1: {results['f1'][0]:.4f}")

BERTScore EXAMPLES

[HAU] Example
Source: Danius said, "Right now we are doing nothing. I have called and sent emails to his closest collaborator and received very friendly replies. For now, that is certainly enough."
Hypothesis (NLLB): Danius ya ce, "A yanzu ba mu yi kome ba. Na kira kuma na aika imel zuwa ga abokin aikinsa mafi kusa kuma na karɓi amsoshi masu kyau. A yanzu, wannan ya isa".
Reference (Corrected Data): Danius ya ce, “Yanzu haka ba mwa yin komi. Na kira na kuma aika emails zuwa abokan aikinsa na kusa na kuma anshi amsoshi masu kyau. A yanzu haka, wannan bai isa ba.”
>> BERTScore F1: 0.9419

[NSO] Example
Source: Danius said, "Right now we are doing nothing. I have called and sent emails to his closest collaborator and received very friendly replies. For now, that is certainly enough."
Hypothesis (NLLB): Danius o itše, "Ga bjale ga re dire selo. Ke ile ka bitša le go romela melaetša ya elektroniki go mošomišani wa gagwe wa kgaufsi kudu gomme ka amogela dikarabo tše borut

In [22]:
from google.colab import files
files.download('all_hypotheses.json')
files.download('final_llm_analysis.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
#Interactive Demo with Human Correction

def run_interactive_demo():
    print("\n--- INTERACTIVE DEMO MODE ---")
    print("Type 'exit' at any prompt to quit.")

    while True:
        try:
            #Get User Input
            src_text = input("\nEnter English sentence to translate:\n> ")
            if src_text.lower() == 'exit': break

            tgt_lang = input("Target Language (hau, nso, tso, zul):\n> ").lower()
            if tgt_lang == 'exit': break
            if tgt_lang not in LANGUAGES:
                 print(f"Invalid language. Choose from: {LANGUAGES}")
                 continue

            model_choice = input("Model (NLLB, MarianMT):\n> ")
            if model_choice.lower() == 'exit': break
            if model_choice not in ["NLLB", "MarianMT"]:
                 model_choice = "NLLB"

            #Run Translation
            print(f"\nTranslating with {model_choice}...")
            hyps = translate_batch(model_choice, tgt_lang, [src_text])
            hyp_text = hyps[0]
            print(f"Hypothesis ({tgt_lang}): {hyp_text}")

            #Initial LLM Analysis (Blind - No Reference)
            print("\nRunning initial Gemini Analysis (Blind)...")
            # Pass "N/A" to tell Gemini judged purely on source vs hypothesis
            analysis_blind = analyze_with_gemini(src_text, hyp_text, "N/A", tgt_lang)

            print("\n--- BLIND ANALYSIS (No Reference) ---")
            print(f"Is Error: {analysis_blind.get('is_error')}")
            print(f"Category: {analysis_blind.get('error_category')}")
            print(f"Justification: {analysis_blind.get('justification')}")

            # 4. HUMAN IN THE LOOP CORRECTION
            # This is the new part that lets you fix it!
            cont = input("\nDo you want to provide a CORRECT translation? (y/n):\n> ")
            if cont.lower() == 'y':
                user_ref = input(f"Enter correct {tgt_lang} translation:\n> ")

                print("\nRe-running Gemini Analysis with YOUR reference...")
                # Re-run analysis, now using the user's input as the 'Gold Standard'
                analysis_ref = analyze_with_gemini(src_text, hyp_text, user_ref, tgt_lang)

                print("\n--- REFERENCE-BASED ANALYSIS ---")
                print(f"Is Error: {analysis_ref.get('is_error')}")
                print(f"Category: {analysis_ref.get('error_category')}")
                print(f"Justification: {analysis_ref.get('justification')}")

            print("------------------------------")

        except Exception as e:
            print(f"An error occurred during the demo: {e}")

print("Interactive Demo updated with Human Correction. Run 'run_interactive_demo()' to start.")

Interactive Demo updated with Human Correction. Run 'run_interactive_demo()' to start.


In [None]:
run_interactive_demo()