In [None]:
from transformers import MarianMTModel, MarianTokenizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge

model_name = "Helsinki-NLP/opus-mt-en-vi"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

In [None]:
def translate_en_to_vi(sentence: str) -> str:
    """Translate English sentence to Vietnamese using pre-trained model."""
    inputs = tokenizer(sentence, return_tensors="pt", padding=True)
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

In [None]:
# Example
print("Translation Example:")
print("EN: Hello, how are you?")
print("VI:", translate_en_to_vi("Hello, how are you?"))

In [None]:
def evaluate(sentences):
    rouge = Rouge()

    bleu_scores, meteor_scores, rouge_scores = [], [], []

    for en_sentence, ref_vi in sentences:
        pred_vi = translate_en_to_vi(en_sentence)
        
        # BLEU
        bleu = sentence_bleu([ref_vi.split()], pred_vi.split())
        bleu_scores.append(bleu)
        
        # METEOR
        meteor = meteor_score([ref_vi.split()], pred_vi.split())
        meteor_scores.append(meteor)
        
        # ROUGE (needs strings)
        rouge_result = rouge.get_scores(pred_vi, ref_vi)[0]["rouge-l"]["f"]
        rouge_scores.append(rouge_result)
        
        print(f"\nEN: {en_sentence}")
        print(f"Reference VI: {ref_vi}")
        print(f"Predicted VI: {pred_vi}")
        print(f"BLEU: {bleu:.4f}, METEOR: {meteor:.4f}, ROUGE-L F1: {rouge_result:.4f}")

    print("\n--- AVERAGE METRICS ---")
    print(f"BLEU: {sum(bleu_scores)/len(bleu_scores):.4f}")
    print(f"METEOR: {sum(meteor_scores)/len(meteor_scores):.4f}")
    print(f"ROUGE-L F1: {sum(rouge_scores)/len(rouge_scores):.4f}")

In [None]:
# Reference dataset (EN sentences + Reference VI translations)
examples = [
    ("The weather is nice today.", "Thời tiết hôm nay thật đẹp."),
    ("I love learning new languages.", "Tôi thích học những ngôn ngữ mới."),
    ("This book is very interesting.", "Cuốn sách này rất thú vị."),
    ("Can you help me with my homework?", "Bạn có thể giúp tôi làm bài tập về nhà không?"),
]

evaluate(examples)