# NLLB-600M Model Exploration
Load the saved model and test English → Chinese translation

In [None]:
# Import all required libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from datasets import load_from_disk

## 1. Load the saved model

In [None]:
# Load tokenizer and model from local directory
model_dir = "../models/nllb-600M"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_dir)
print("✓ Tokenizer loaded")

print("\nLoading model...")
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
print("✓ Model loaded")

# Determine device: CUDA (Colab/NVIDIA) > MPS (Apple Silicon) > CPU
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

model = model.to(device)
print(f"\n✓ Model moved to device: {device}")

print(f"\nModel: NLLB-200-distilled-600M")
print(f"Parameters: ~600M")

## 2. Test English → Chinese translation

In [None]:
# NLLB uses language codes: eng_Latn (English), zho_Hans (Simplified Chinese)
test_sentence = "Hello, how are you?"
print(f"Input (English): {test_sentence}")

# Set source language
tokenizer.src_lang = "eng_Latn"

# Tokenize and move to device
inputs = tokenizer(test_sentence, return_tensors="pt").to(device)

# Generate translation
translated_tokens = model.generate(
    **inputs,
    forced_bos_token_id=tokenizer.convert_tokens_to_ids("zho_Hans"),
    max_length=50
)

# Decode
translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
print(f"Output (Chinese): {translation}")

## 3. Test with multiple sentences

In [None]:
test_sentences = [
    "Hello, how are you?",
    "I am a student at the university.",
    "The cat is on the table.",
    "What time is it?",
    "I love learning languages."
]

print("="*80)
print("English → Chinese Translations")
print("="*80)

tokenizer.src_lang = "eng_Latn"

for i, sentence in enumerate(test_sentences, 1):
    inputs = tokenizer(sentence, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        forced_bos_token_id=tokenizer.convert_tokens_to_ids("zho_Hans"),
        max_length=50
    )
    translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    
    print(f"\n{i}. EN: {sentence}")
    print(f"   ZH: {translation}")

## 4. Inspect model architecture

In [None]:
# Check model configuration
print("Model Configuration:")
print(f"  Number of encoder layers: {model.config.encoder_layers}")
print(f"  Number of decoder layers: {model.config.decoder_layers}")
print(f"  Number of attention heads: {model.config.encoder_attention_heads}")
print(f"  Hidden size: {model.config.d_model}")
print(f"  Vocabulary size: {model.config.vocab_size}")
print(f"\nModel has encoder-decoder architecture for sequence-to-sequence translation")

In [None]:
# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\nParameters (in millions): {total_params / 1e6:.1f}M")

## 5. Test with dataset examples

In [None]:
# Load some examples from our saved dataset
dataset = load_from_disk("../data/wmt17_en-zh_validation_2000")
print(f"Loaded {len(dataset)} sentence pairs\n")

# Test on first 3 examples
print("="*80)
print("Testing on WMT17 dataset examples (English → Chinese)")
print("="*80)

tokenizer.src_lang = "eng_Latn"

for i in range(3):
    example = dataset[i]["translation"]
    english = example["en"]
    chinese_ref = example["zh"]
    
    # Translate
    inputs = tokenizer(english, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        forced_bos_token_id=tokenizer.convert_tokens_to_ids("zho_Hans"),
        max_length=100
    )
    translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    
    print(f"\nExample {i+1}:")
    print(f"EN: {english}")
    print(f"ZH (reference): {chinese_ref}")
    print(f"ZH (translated): {translation}")

## Summary

**Model loaded successfully:**
- NLLB-200-distilled-600M (~600M parameters)
- English → Chinese translation working
- GPU acceleration enabled (CUDA/MPS/CPU)
- Ready for attention extraction

**Next steps:**
1. Extract attention weights from encoder and decoder
2. Build attention graphs
3. Compute persistent homology