In [4]:
import json
import numpy as np
import html
import re
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.meteor_score import meteor_score
import sacrebleu
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rouge_score import rouge_scorer


def clean_text(text):
    text = html.unescape(text)
    text = re.sub(r'[^\w\s.,;:!?\'"-]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()  
    return text


def evaluate_translation_scores(json_path):
    """
    Tính BLEU, SacreBLEU, METEOR, ROUGE-L, TF-IDF cosine similarity
    giữa 'vi_pred' và 'vi_label' trong file JSON.
    """

    # --- Đọc dữ liệu ---
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    preds = [clean_text(item["vi_pred"].strip()) for item in data]
    refs = [clean_text(item["vi_label"].strip()) for item in data]

    # --- BLEU (nltk) ---
    refs_nested = [[r.split()] for r in refs]
    preds_tokenized = [p.split() for p in preds]
    bleu_score = corpus_bleu(refs_nested, preds_tokenized) * 100

    # --- SacreBLEU ---
    sacre_bleu = sacrebleu.corpus_bleu(preds, [refs]).score

    # --- METEOR (phải token hóa trước) ---
    meteor_scores = [
        meteor_score([ref.split()], pred.split())
        for ref, pred in zip(refs, preds)
    ]
    avg_meteor = np.mean(meteor_scores) * 100

    # --- ROUGE (dùng ROUGE-L) ---
    rouge = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=True)
    rouge_scores = [rouge.score(r, p)["rougeL"].fmeasure for r, p in zip(refs, preds)]
    avg_rougeL = np.mean(rouge_scores) * 100

    # --- TF-IDF cosine similarity ---
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(refs + preds)
    n = len(refs)
    tfidf_sims = [
        cosine_similarity(tfidf_matrix[i], tfidf_matrix[i + n])[0][0]
        for i in range(n)
    ]
    avg_tfidf_sim = np.mean(tfidf_sims) * 100

    return {
        "BLEU": bleu_score,
        "SacreBLEU": sacre_bleu,
        "METEOR": avg_meteor,
        "ROUGE-L": avg_rougeL,
        "TF-IDF Cosine": avg_tfidf_sim,
    }


In [5]:
scores = evaluate_translation_scores("ALT_test/alt_multilingual_dataset/ALT_josn/en_vi_translations_seallm_fixed.json")
print(scores)

{'BLEU': 14.969834025862633, 'SacreBLEU': 16.640343031723138, 'METEOR': 45.32174723031538, 'ROUGE-L': 51.00639431585394, 'TF-IDF Cosine': 52.23257006558488}


In [12]:
with open("ALT_test/alt_multilingual_dataset/ALT_josn/en_vi_translations_seallm.json", "r", encoding="utf-8") as f:
        data = json.load(f)

# COMET

In [1]:
from comet import download_model, load_from_checkpoint
import json

model_path = download_model("Unbabel/wmt22-comet-da")
model = load_from_checkpoint(model_path)

with open("ALT_test/ALT_josn/en_vi_nllb200.json", "r", encoding="utf-8") as f:
    data_json = json.load(f)

# 2️⃣ Chuyển sang format đúng cho COMET
data = [
    {
        "src": item["en"],
        "mt": item["vi_pred"],
        "ref": item["vi_label"]
    }
    for item in data_json
]

model_output = model.predict(data, batch_size=16, gpus=1)
print (model_output)


  from pkg_resources import DistributionNotFound, get_distribution


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\dongh\.cache\huggingface\hub\models--Unbabel--wmt22-comet-da\snapshots\2760a223ac957f30acfb18c8aa649b01cf1d75f2\checkpoints\model.ckpt`
Lock 1769875716992 acquired on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\34ddbd64a4cd3f2d9d8a9120d3662d0bf91baead.lock


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Lock 1769875716992 released on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\34ddbd64a4cd3f2d9d8a9120d3662d0bf91baead.lock
Lock 1769875710032 acquired on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\db9af13bf09fd3028ca32be90d3fb66d5e470399.lock


sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Lock 1769875710032 released on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\db9af13bf09fd3028ca32be90d3fb66d5e470399.lock
Lock 1769875714784 acquired on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\463f3414782c1c9405828c9b31bfa36dda1f45c5.lock


tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Lock 1769875714784 released on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\463f3414782c1c9405828c9b31bfa36dda1f45c5.lock
Lock 1769875707296 acquired on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\8e5fb14e1352fd8fc678a7b293b63cfb5cf091f6.lock


config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

Lock 1769875707296 released on C:\Users\dongh\.cache\huggingface\hub\.locks\models--xlm-roberta-large\8e5fb14e1352fd8fc678a7b293b63cfb5cf091f6.lock
Encoder model frozen.
c:\Users\dongh\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\core\saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


FileNotFoundError: [Errno 2] No such file or directory: 'ALT_test/ALT_josn/en_vi_nllb200.json'