In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

CUDA available: True
CUDA device count: 1
Current device: 0
GPU name: Tesla T4


In [None]:
!pip install datasets evaluate sacrebleu transformers torch -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from huggingface_hub import login
import os

print("🔐 Authenticating session...")
# This will create an input field to paste your token
login(add_to_git_credential=False)

🔐 Authenticating session...


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# ─────────────────────────────────────────────────────────────
# 🏆 PHASE 3C: High-Confidence Evaluation (250 Samples + Reverse)
# ─────────────────────────────────────────────────────────────
import torch
from transformers import pipeline
from datasets import load_dataset
import evaluate
from tqdm import tqdm
import pandas as pd

# ⚙️ Configuration
NUM_SAMPLES = 250  # 🚀 Increased for Research Significance
device = 0 if torch.cuda.is_available() else -1
print(f"🔥 Loading NLLB Model on {'GPU' if device==0 else 'CPU'}...")

SRC_CODE = "eng_Latn"

# 🛠️ Setup Models & Metrics
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M", device=device)
bleu = evaluate.load("sacrebleu")

eval_config = {
    "Hindi": "hin_Deva",
    "Telugu": "tel_Telu",
    "French": "fra_Latn",
    "German": "deu_Latn",
}

all_results = []

print(f"🚀 Starting Extended Evaluation ({NUM_SAMPLES} samples per pair)...")

for lang_name, tgt_code in eval_config.items():
    print(f"\n🌍 Processing: {lang_name} ({tgt_code})")

    # ─── PART A: Forward (English ➔ Native) ───
    try:
        ds_tgt = load_dataset("openlanguagedata/flores_plus", tgt_code, split=f"devtest[:{NUM_SAMPLES}]")
        ds_src = load_dataset("openlanguagedata/flores_plus", SRC_CODE, split=f"devtest[:{NUM_SAMPLES}]")

        fwd_preds, fwd_refs = [], []
        for i in tqdm(range(len(ds_tgt)), desc=f"Forward {lang_name}"):
            out = translator(ds_src[i]['text'], src_lang=SRC_CODE, tgt_lang=tgt_code, max_length=400)
            fwd_preds.append(out[0]['translation_text'])
            fwd_refs.append([ds_tgt[i]['text']])

        fwd_score = bleu.compute(predictions=fwd_preds, references=fwd_refs)['score']
        all_results.append({"Pair": f"EN ➔ {lang_name}", "BLEU": round(fwd_score, 2), "Samples": len(fwd_preds)})

        # ─── PART B: Reverse (Native ➔ English) ───
        rev_preds, rev_refs = [], []
        for i in tqdm(range(len(ds_tgt)), desc=f"Reverse {lang_name}"):
            out = translator(ds_tgt[i]['text'], src_lang=tgt_code, tgt_lang=SRC_CODE, max_length=400)
            rev_preds.append(out[0]['translation_text'])
            rev_refs.append([ds_src[i]['text']])

        rev_score = bleu.compute(predictions=rev_preds, references=rev_refs)['score']
        all_results.append({"Pair": f"{lang_name} ➔ EN", "BLEU": round(rev_score, 2), "Samples": len(rev_preds)})

    except Exception as e:
        print(f"⚠️ Error with {lang_name}: {e}")

# 📊 Final Comparative Table
print("\n" + "="*55)
print(f"🏆 PROJECT PERFORMANCE LOG: NLLB-200 (N={NUM_SAMPLES})")
print("="*55)
df = pd.DataFrame(all_results)
print(df.to_markdown(index=False))
print("="*55)

🔥 Loading NLLB Model on GPU...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/564 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.3M [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Downloading builder script: 0.00B [00:00, ?B/s]

🚀 Starting Extended Evaluation (250 samples per pair)...

🌍 Processing: Hindi (hin_Deva)


README.md:   0%|          | 0.00/74.5k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

dev/hin_Deva.parquet:   0%|          | 0.00/161k [00:00<?, ?B/s]

devtest/hin_Deva.parquet:   0%|          | 0.00/168k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/997 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/1012 [00:00<?, ? examples/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

dev/eng_Latn.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

devtest/eng_Latn.parquet:   0%|          | 0.00/117k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/997 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/1012 [00:00<?, ? examples/s]

Forward Hindi:   4%|▍         | 10/250 [00:13<04:43,  1.18s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Forward Hindi: 100%|██████████| 250/250 [03:35<00:00,  1.16it/s]
Reverse Hindi: 100%|██████████| 250/250 [02:55<00:00,  1.43it/s]



🌍 Processing: Telugu (tel_Telu)


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

dev/tel_Telu.parquet:   0%|          | 0.00/170k [00:00<?, ?B/s]

devtest/tel_Telu.parquet:   0%|          | 0.00/176k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/997 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/1012 [00:00<?, ? examples/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Forward Telugu: 100%|██████████| 250/250 [04:04<00:00,  1.02it/s]
Reverse Telugu: 100%|██████████| 250/250 [02:56<00:00,  1.42it/s]



🌍 Processing: French (fra_Latn)


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

dev/fra_Latn.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

devtest/fra_Latn.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/997 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/1012 [00:00<?, ? examples/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Forward French: 100%|██████████| 250/250 [03:50<00:00,  1.08it/s]
Reverse French: 100%|██████████| 250/250 [03:02<00:00,  1.37it/s]



🌍 Processing: German (deu_Latn)


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

dev/deu_Latn.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

devtest/deu_Latn.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/997 [00:00<?, ? examples/s]

Generating devtest split:   0%|          | 0/1012 [00:00<?, ? examples/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Forward German: 100%|██████████| 250/250 [03:38<00:00,  1.15it/s]
Reverse German: 100%|██████████| 250/250 [03:00<00:00,  1.38it/s]


🏆 PROJECT PERFORMANCE LOG: NLLB-200 (N=250)
| Pair        |   BLEU |   Samples |
|:------------|-------:|----------:|
| EN ➔ Hindi  |  32.37 |       250 |
| Hindi ➔ EN  |  41.55 |       250 |
| EN ➔ Telugu |  19.12 |       250 |
| Telugu ➔ EN |  37.41 |       250 |
| EN ➔ French |  48.71 |       250 |
| French ➔ EN |  43.56 |       250 |
| EN ➔ German |  34.28 |       250 |
| German ➔ EN |  42.25 |       250 |





In [None]:
!pip install -q unbabel-comet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.5/55.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.0/91.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.4/101.4 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m73.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.5/849.5 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m529.7/529.7 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resol

In [None]:
from comet import download_model, load_from_checkpoint
import torch

# Load COMET-22 (DA = Direct Assessment, best quality)
model_path = download_model("Unbabel/wmt22-comet-da")
comet_model = load_from_checkpoint(model_path)

comet_model.eval()
print("✅ COMET-22 model loaded")

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

.gitattributes: 0.00B [00:00, ?B/s]

hparams.yaml:   0%|          | 0.00/567 [00:00<?, ?B/s]

LICENSE: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

checkpoints/model.ckpt:   0%|          | 0.00/2.32G [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.6.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

✅ COMET-22 model loaded


/usr/local/lib/python3.12/dist-packages/pytorch_lightning/core/saving.py:197: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [None]:
# ─────────────────────────────────────────────────────────────
# 🧠 PHASE 3D: COMET Evaluation (Same Setup as BLEU)
# ─────────────────────────────────────────────────────────────

from datasets import load_dataset
from tqdm import tqdm
import pandas as pd

COMET_SAMPLES = NUM_SAMPLES  # keep same N (e.g., 200)

comet_results = []

print(f"\n🚀 Starting COMET Evaluation (N={COMET_SAMPLES})...")

for lang_name, tgt_code in eval_config.items():
    print(f"\n🌍 COMET Processing: {lang_name}")

    # Load datasets
    ds_tgt = load_dataset(
        "openlanguagedata/flores_plus",
        tgt_code,
        split=f"devtest[:{COMET_SAMPLES}]"
    )
    ds_src = load_dataset(
        "openlanguagedata/flores_plus",
        SRC_CODE,
        split=f"devtest[:{COMET_SAMPLES}]"
    )

    # ─── PART A: Forward (English ➔ Native) ───
    comet_data_fwd = []
    for i in tqdm(range(len(ds_tgt)), desc=f"COMET EN ➔ {lang_name}"):
        mt = translator(
            ds_src[i]['text'],
            src_lang=SRC_CODE,
            tgt_lang=tgt_code,
            max_length=400
        )[0]['translation_text']

        comet_data_fwd.append({
            "src": ds_src[i]['text'],
            "mt": mt,
            "ref": ds_tgt[i]['text']
        })

    with torch.no_grad():
        fwd_comet = comet_model.predict(
            comet_data_fwd,
            batch_size=8,
            gpus=1 if torch.cuda.is_available() else 0
        ).scores

    comet_results.append({
        "Pair": f"EN ➔ {lang_name}",
        "COMET": round(sum(fwd_comet) / len(fwd_comet), 4),
        "Samples": len(fwd_comet)
    })

    # ─── PART B: Reverse (Native ➔ English) ───
    comet_data_rev = []
    for i in tqdm(range(len(ds_tgt)), desc=f"COMET {lang_name} ➔ EN"):
        mt = translator(
            ds_tgt[i]['text'],
            src_lang=tgt_code,
            tgt_lang=SRC_CODE,
            max_length=400
        )[0]['translation_text']

        comet_data_rev.append({
            "src": ds_tgt[i]['text'],
            "mt": mt,
            "ref": ds_src[i]['text']
        })

    with torch.no_grad():
        rev_comet = comet_model.predict(
            comet_data_rev,
            batch_size=8,
            gpus=1 if torch.cuda.is_available() else 0
        ).scores

    comet_results.append({
        "Pair": f"{lang_name} ➔ EN",
        "COMET": round(sum(rev_comet) / len(rev_comet), 4),
        "Samples": len(rev_comet)
    })

# 📊 Final COMET Table
print("\n" + "="*60)
print("🏆 CONTEXT & MEANING EVALUATION: COMET-22")
print("="*60)

df_comet = pd.DataFrame(comet_results)
print(df_comet.to_markdown(index=False))
print("="*60)



🚀 Starting COMET Evaluation (N=250)...

🌍 COMET Processing: Hindi


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

COMET EN ➔ Hindi: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████| 32/32 [00:07<00:00,  4.01it/s]
COMET Hindi ➔ EN: 100%|██████████| 250/250 [02:55<00:00,  1.43it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU


🌍 COMET Processing: Telugu


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

COMET EN ➔ Telugu: 100%|██████████| 250/250 [04:04<00:00,  1.02it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████| 32/32 [00:09<00:00,  3.47it/s]
COMET Telugu ➔ EN: 100%|██████████| 250/250 [02:55<00:00,  1.43it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:G


🌍 COMET Processing: French


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

COMET EN ➔ French: 100%|██████████| 250/250 [03:49<00:00,  1.09it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████| 32/32 [00:08<00:00,  3.77it/s]
COMET French ➔ EN: 100%|██████████| 250/250 [03:02<00:00,  1.37it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:G


🌍 COMET Processing: German


Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/224 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/218 [00:00<?, ?it/s]

COMET EN ➔ German: 100%|██████████| 250/250 [03:40<00:00,  1.13it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████| 32/32 [00:08<00:00,  3.96it/s]
COMET German ➔ EN: 100%|██████████| 250/250 [03:05<00:00,  1.35it/s]
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:G


🏆 CONTEXT & MEANING EVALUATION: COMET-22
| Pair        |   COMET |   Samples |
|:------------|--------:|----------:|
| EN ➔ Hindi  |  0.8077 |       250 |
| Hindi ➔ EN  |  0.8935 |       250 |
| EN ➔ Telugu |  0.8606 |       250 |
| Telugu ➔ EN |  0.8783 |       250 |
| EN ➔ French |  0.8766 |       250 |
| French ➔ EN |  0.8844 |       250 |
| EN ➔ German |  0.8643 |       250 |
| German ➔ EN |  0.8859 |       250 |
