In [None]:
# Install all required libraries
!pip install unbabel-comet
!pip install -q transformers accelerate evaluate bert_score rouge_score sacremoses nltk psutil pandas sacrebleu sentencepiece

from huggingface_hub import login

# Paste your token here
login(token='hugging face token goes here')

# Download NLTK data needed for BLEU and ROUGE
import nltk
nltk.download('punkt')

# Imports
import time
import torch
import psutil
import logging
from nltk.translate.bleu_score import sentence_bleu
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import evaluate

# Suppress unwanted warnings
logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# Detect device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Example sentences per language
paragraph_data = [
    {"code": "ar", "lang": "Arabic", "src": "أنا أتقدم للحصول على تأشيرة سياحية لزيارة الولايات المتحدة. أنوي البقاء لمدة أسبوعين وزيارة معالم مشهورة مثل تمثال الحرية وساحة التايمز. سأعود إلى بلدي بعد انتهاء عطلتي.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "es", "lang": "Spanish", "src": "Estoy solicitando una visa de turista para visitar los Estados Unidos. Tengo la intención de quedarme dos semanas y visitar lugares famosos como la Estatua de la Libertad y Times Square. Regresaré a mi país después de mis vacaciones.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "fr", "lang": "French", "src": "Je demande un visa de touriste pour visiter les États-Unis. J’ai l’intention de rester deux semaines et de visiter des sites célèbres comme la Statue de la Liberté et Times Square. Je retournerai dans mon pays après mes vacances.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "ru", "lang": "Russian", "src": "Я подаю заявку на туристическую визу для поездки в Соединенные Штаты. Я собираюсь остаться на две недели и посетить известные достопримечательности, такие как Статуя Свободы и Таймс-сквер. Я вернусь в свою страну после отпуска.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "ro", "lang": "Romanian", "src": "Solicit o viză turistică pentru a vizita Statele Unite. Intenționez să stau două săptămâni și să vizitez repere populare precum Statuia Libertății și Times Square. Mă voi întoarce în țara mea după vacanță.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "bg", "lang": "Bulgarian", "src": "Кандидатствам за туристическа виза за посещение на Съединените щати. Възнамерявам да остана две седмици и да посетя популярни забележителности като Статуята на свободата и Таймс Скуеър. Ще се върна в родината си след ваканцията.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "cs", "lang": "Czech", "src": "Žádám o turistické vízum pro návštěvu Spojených států. Mám v úmyslu zůstat dva týdny a navštívit známá místa jako Sochu svobody a Times Square. Po dovolené se vrátím do své domovské země.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "da", "lang": "Danish", "src": "Jeg ansøger om et turistvisum for at besøge USA. Jeg har til hensigt at blive i to uger og besøge kendte vartegn som Frihedsgudinden og Times Square. Jeg vender tilbage til mit hjemland efter ferien.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "de", "lang": "German", "src": "Ich beantrage ein Touristenvisum, um die Vereinigten Staaten zu besuchen. Ich beabsichtige, zwei Wochen zu bleiben und bekannte Sehenswürdigkeiten wie die Freiheitsstatue und den Times Square zu besuchen. Nach meinem Urlaub kehre ich in mein Heimatland zurück.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
    {"code": "et", "lang": "Estonian", "src": "Taotlen turismiviisat Ameerika Ühendriikide külastamiseks. Kavatsen jääda kaheks nädalaks ja külastada kuulsaid vaatamisväärsusi nagu Vabadussammas ja Times Square. Pärast puhkust naasen oma koduriiki.", "ref": "I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation."},
]

# mBART language code map
LANG_CODE_MAP = {
    "ar": "ar_AR", "es": "es_XX", "fr": "fr_XX", "ru": "ru_RU", "ro": "ro_RO",
    "bg": "bg_BG", "cs": "cs_CZ", "da": "da_DK", "de": "de_DE", "et": "et_EE",
}

# Load evaluation metrics
bleu_mod      = evaluate.load("bleu")
bertscore_mod = evaluate.load("bertscore")
meteor_mod    = evaluate.load("meteor")
rouge_mod     = evaluate.load("rouge")
ter_mod       = evaluate.load("ter")
comet_mod     = evaluate.load("comet")

# Helpers to measure system resources
def measure_system():
    proc = psutil.Process()
    return proc.memory_info().rss / 1024**2, psutil.cpu_percent(interval=None)

# Translate & time function
def translate_and_time(model, tokenizer, text: str, language_code: str):
    tokenizer.src_lang = language_code
    model.config.forced_bos_token_id = tokenizer.lang_code_to_id["en_XX"]
    t0 = time.perf_counter()
    inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
    outputs = model.generate(**inputs)
    t1 = time.perf_counter()
    pred = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    tokens = len(tokenizer.tokenize(text))
    return pred, t1 - t0, t1 - t0, tokens  # using same for first-token and total

# Load mBART once
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer  = MBart50TokenizerFast.from_pretrained(model_name, use_auth_token=True)
model      = MBartForConditionalGeneration.from_pretrained(model_name, use_auth_token=True).to(device)
model.eval()

# Evaluate each language
for item in paragraph_data :
    code, lang, src, ref = item["code"], item["lang"], item["src"], item["ref"]
    print(f"\n=== {lang} → English ===")

    # Measure before
    ram_before, cpu_before = measure_system()

    # Translate
    mbart_code = LANG_CODE_MAP[code]
    pred, t_first, t_total, token_count = translate_and_time(model, tokenizer, src, mbart_code)

    # Measure after
    ram_after, cpu_after = measure_system()

    # Compute metrics
    bleu_score   = bleu_mod.compute(predictions=[pred], references=[[ref]])["bleu"]
    bert_res     = bertscore_mod.compute(predictions=[pred], references=[ref], lang="en")
    meteor_score = meteor_mod.compute(predictions=[pred], references=[[ref]])["meteor"]
    rouge_score  = rouge_mod.compute(predictions=[pred], references=[ref])["rougeL"]
    ter_score    = ter_mod.compute(predictions=[pred], references=[[ref]])["score"]
    comet_res    = comet_mod.compute(sources=[src], predictions=[pred], references=[ref])
    comet_score  = next(v for v in comet_res.values() if isinstance(v, (int, float)))

    # Print
    print(f"Source    : {src}")
    print(f"Reference : {ref}")
    print(f"Predicted : {pred}\n")
    print(f"BLEU      : {bleu_score:.4f}")
    print(f"BERTScore : {bert_res['f1'][0]:.4f}")
    print(f"METEOR    : {meteor_score:.4f}")
    print(f"ROUGE-L   : {rouge_score:.4f}")
    print(f"TER       : {ter_score:.4f}")
    print(f"COMET     : {comet_score:.4f}\n")
    print(f"Time to 1st tok : {t_first:.4f} s")
    print(f"Total inf time  : {t_total:.4f} s")
    print(f"Tokens/sec      : {token_count / t_total:.2f}")
    if device.type=="cuda":
        print(f"VRAM  : {(torch.cuda.max_memory_allocated() / 1024**2):.2f} MB")

    print(f"RAM   : {ram_after-ram_before:.2f} MB")
    print(f"CPU   : {cpu_after-cpu_before:.2f} %")
    size_mb = sum(p.numel() for p in model.parameters())*4/1024**2
    print(f"Model size : {size_mb:.2f} MB")
    print("="*50)




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Using device: cpu


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]


=== Arabic → English ===




tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Source    : أنا أتقدم للحصول على تأشيرة سياحية لزيارة الولايات المتحدة. أنوي البقاء لمدة أسبوعين وزيارة معالم مشهورة مثل تمثال الحرية وساحة التايمز. سأعود إلى بلدي بعد انتهاء عطلتي.
Reference : I am applying for a tourist visa to visit the United States. I intend to stay for two weeks and visit popular landmarks like the Statue of Liberty and Times Square. I will return to my home country after my vacation.
Predicted : I'm applying for a U.S. tour. I want to stay for two weeks and go to famous places like the Statue of Liberty and Times Square. I'll come back after my vacation.

BLEU      : 0.3759
BERTScore : 0.9538
METEOR    : 0.6391
ROUGE-L   : 0.6053
TER       : 48.7805
COMET     : 0.8290

Time to 1st tok : 37.5301 s
Total inf time  : 37.5301 s
Tokens/sec      : 1.36
RAM   : 2263.41 MB
CPU   : -6.40 %
Model size : 2330.32 MB

=== Spanish → English ===
Source    : Estoy solicitando una visa de turista para visitar los Estados Unidos. Tengo la intención de quedarme dos semanas y visit