# NLLB-200 Adapter Quality Test

**Test your trained NLLB-200 adapter vs BLOOMZ**

Upload `nllb_18languages_adapter.zip` and run all cells!

---

## Cell 1: Install Packages

In [None]:
!pip install -q transformers peft torch sentencepiece

print("✅ Packages installed!")

## Cell 2: Upload Adapter

**Upload `nllb_18languages_adapter.zip`**

In [None]:
from google.colab import files
import zipfile
import os

if not os.path.exists('nllb_18languages_adapter'):
    print("Upload nllb_18languages_adapter.zip or .rar")
    uploaded = files.upload()
    
    # Find the uploaded file
    filename = list(uploaded.keys())[0]
    print(f"Uploaded: {filename}")
    
    # Extract based on file type
    if filename.endswith('.zip'):
        with zipfile.ZipFile(filename, 'r') as z:
            z.extractall('.')
    elif filename.endswith('.rar'):
        !pip install -q rarfile
        import rarfile
        with rarfile.RarFile(filename, 'r') as r:
            r.extractall('.')
    else:
        # Try unzip command as fallback
        !unzip -q {filename}
    
    print("✅ Extracted!")
else:
    print("✅ Already uploaded!")

print("\nFiles:")
for f in os.listdir('nllb_18languages_adapter'):
    s = os.path.getsize(f'nllb_18languages_adapter/{f}')/1024/1024
    print(f"  {f:30} {s:.1f}MB")

## Cell 3: Load Model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel

print("Loading NLLB-200...\n")

model_name = "facebook/nllb-200-distilled-600M"

tokenizer = AutoTokenizer.from_pretrained(model_name)
print("✅ Tokenizer")

base_model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
print(f"✅ Base model on {base_model.device}")

model = PeftModel.from_pretrained(base_model, "nllb_18languages_adapter")
print("✅ Adapter loaded!\n🎉 Ready!")

## Cell 4: Run Tests

In [None]:
import time

tests = [
    {"name": "Gujarati (BLOOMZ: Chinese)", "input": "Hello, good morning!", "chars": ["ગ","જ","ર"], "script": "Gujarati"},
    {"name": "Telugu (BLOOMZ: English)", "input": "Thank you very much.", "chars": ["త","ల","గ"], "script": "Telugu"},
    {"name": "Bengali (BLOOMZ: Artifacts)", "input": "This is beautiful.", "chars": ["ব","া","ং"], "script": "Bengali"},
    {"name": "Hindi", "input": "How are you?", "chars": ["न","म","स"], "script": "Devanagari"},
    {"name": "Tamil", "input": "Welcome home.", "chars": ["த","ம","ி"], "script": "Tamil"},
    {"name": "Kannada", "input": "What is your name?", "chars": ["ಕ","ನ"], "script": "Kannada"},
    {"name": "Malayalam", "input": "Good night.", "chars": ["മ","ല","യ"], "script": "Malayalam"},
    {"name": "Marathi", "input": "Please help me.", "chars": ["म","र","ा"], "script": "Devanagari"},
]

print("="*80)
print("TESTING NLLB-200 ADAPTER")
print("="*80)
print()

results = []

for i, t in enumerate(tests, 1):
    print(f"{i}/{len(tests)}: {t['name']}")
    print(f"  In:  {t['input']}")
    
    # IMPORTANT: Set target language for NLLB!
    # Get target language code (for now, try to infer from test name)
    lang_codes = {
        "Gujarati": "guj_Gujr",
        "Telugu": "tel_Telu",
        "Bengali": "ben_Beng",
        "Hindi": "hin_Deva",
        "Tamil": "tam_Taml",
        "Kannada": "kan_Knda",
        "Malayalam": "mal_Mlym",
        "Marathi": "mar_Deva"
    }
    
    # Find language from test name
    tgt_lang = None
    for lang, code in lang_codes.items():
        if lang in t['name']:
            tgt_lang = code
            break
    
    inp = tokenizer(t['input'], return_tensors="pt").to(model.device)
    
    start = time.time()
    with torch.no_grad():
        # Force target language using NLLB's method
        if tgt_lang:
            # Set target language in tokenizer
            tokenizer.src_lang = "eng_Latn"  # Source is English
            tokenizer.tgt_lang = tgt_lang     # Target is the language we want
            forced_bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)
            out = model.generate(**inp, max_length=128, num_beams=4, early_stopping=True, forced_bos_token_id=forced_bos_token_id)
        else:
            out = model.generate(**inp, max_length=128, num_beams=4, early_stopping=True)
    gen_time = time.time() - start
    
    trans = tokenizer.decode(out[0], skip_special_tokens=True)
    print(f"  Out: {trans}")
    print(f"  Time: {gen_time:.2f}s")
    
    has_script = any(c in trans for c in t['chars'])
    has_chinese = any('\u4e00' <= c <= '\u9fff' for c in trans)
    is_english = sum(1 for c in trans if c.isascii())/max(len(trans),1) > 0.7
    
    if has_chinese:
        verdict = "❌ CHINESE"
        q = "bad"
    elif is_english:
        verdict = "❌ ENGLISH"
        q = "bad"
    elif has_script:
        verdict = f"✅ {t['script']}"
        q = "good"
    else:
        verdict = "⚠️  UNKNOWN"
        q = "unknown"
    
    print(f"  ➜ {verdict}")
    print()
    
    results.append({"test": t['name'], "q": q, "time": gen_time})

print("="*80)

## Cell 5: Results

In [None]:
print("="*80)
print("FINAL RESULTS")
print("="*80)
print()

good = sum(1 for r in results if r['q']=='good')
bad = sum(1 for r in results if r['q']=='bad')
total = len(results)

print("Results:")
for r in results:
    s = "✅" if r['q']=='good' else "❌"
    print(f"{s} {r['test']:30} - {r['q'].upper()}")

print()
print(f"Quality: {good}/{total} ({good*100/total:.1f}%)")
print(f"Avg time: {sum(r['time'] for r in results)/total:.2f}s")
print()

print("="*80)
print("COMPARISON")
print("="*80)
print()
print("BLOOMZ:")
print("  ❌ Gujarati: Chinese")
print("  ❌ Telugu: English")
print("  ❌ Bengali: Artifacts")
print("  Quality: ~50-70%")
print()
print("NLLB-200:")
print(f"  Quality: {good*100/total:.0f}%")
print(f"  Fixed: {good}/{len([t for t in tests if 'BLOOMZ' in t['name']])} BLOOMZ issues")
print()

if good*100/total >= 85:
    print("🎉 EXCELLENT! Much better than BLOOMZ!")
elif good*100/total >= 70:
    print("✅ GOOD! Significant improvement!")
else:
    print("⚠️  Some issues remain")

print()
print("="*80)