In [29]:
import os
from tqdm import tqdm
import csv
from sacrebleu import sentence_bleu

import nltk
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize
#nltk.download('punkt')

In [30]:
def compute_bleu(reference, hypothesis):
    """
    Computes BLEU score for a single reference-hypothesis pair.
    """
    return sentence_bleu(hypothesis, [reference]).score / 100  # Normalize to [0, 1]

def compute_meteor(reference, hypothesis):
    """
    Computes METEOR score for a single reference-hypothesis pair.
    Tokenizes the input to match the expected format.
    """
    # Tokenize both reference and hypothesis
    tokenized_reference = word_tokenize(reference)
    tokenized_hypothesis = word_tokenize(hypothesis)

    # Calculate METEOR score
    return meteor_score([tokenized_reference], tokenized_hypothesis)


In [42]:
from bleurt.score import BleurtScorer

# Load the BLEURT scorer with a pre-trained checkpoint
bleurt_checkpoint = "bleurt_checkpoints/BLEURT-20"
scorer = BleurtScorer(bleurt_checkpoint)

def compute_bleurt(reference, hypothesis):
    """
    Computes BLEURT score for a single reference-hypothesis pair.
    """
    score = scorer.score(references=[reference], candidates=[hypothesis])
    return score[0] 


INFO:tensorflow:Reading checkpoint bleurt_checkpoints/BLEURT-20.


AssertionError: Could not find BLEURT checkpoint bleurt_checkpoints/BLEURT-20

In [None]:
def evaluate_from_folders(reference_folder, hypothesis_folder, lang="en"):
    """
    Evaluate translations by processing reference and hypothesis files directly from folders.

    Parameters:
    - reference_folder (str): Path to the folder containing reference files.
    - hypothesis_folder (str): Path to the folder containing hypothesis files.
    - lang (str): Language for BERTScore (default is "en").

    Returns:
    - A dictionary with per-file scores for METEOR and BLEU.
    """
    # Find all files in both folders
    reference_files = {
        os.path.basename(f).replace(" English", ""): os.path.join(reference_folder, f)
        for f in os.listdir(reference_folder)
    }
    hypothesis_files = {
        os.path.basename(f): os.path.join(hypothesis_folder, f)
        for f in os.listdir(hypothesis_folder)
    }

    # Ensure matching filenames exist in both folders
    common_files = set(reference_files.keys()) & set(hypothesis_files.keys())
    if not common_files:
        raise ValueError("No matching files found in the reference and hypothesis folders.")

    # Initialize per-file scores
    file_scores = {}

    # Process each matching file
    for filename in tqdm(common_files, desc="Evaluating Files"):
        print(f"Processing: {filename}")

        with open(reference_files[filename], 'r', encoding='utf-8') as ref_file:
            reader = csv.DictReader(ref_file)
            reference = [row["Text"] for row in reader]
        with open(hypothesis_files[filename], 'r', encoding='utf-8') as hyp_file:
            reader = csv.DictReader(hyp_file)
            hypothesis = [row["Text"] for row in reader]

        # Concatenate all texts into single strings
        reference = " ".join(reference)
        hypothesis = " ".join(hypothesis)

        #print(f"Reference: {reference}")
        #print(f"Hypothesis: {hypothesis}")
        # Compute metrics
        meteor = compute_meteor(reference, hypothesis)
        bleu = compute_bleu(reference, hypothesis)

        # Save per-file scores
        file_scores[filename] = {
            "METEOR": meteor,
            "BLEU": bleu,
        }

    return file_scores

In [40]:
# Define folder paths
reference_folder = "../results/Parkinson/en"
hypothesis_folder = "../results/Parkinson/translation_fra_to_eng"

# Evaluate translations
scores = evaluate_from_folders(reference_folder, hypothesis_folder, lang="en")
scores

Evaluating Files:   0%|          | 0/3 [00:00<?, ?it/s]

Processing: 7-1_script_interview_clinique_3_21-08-2020.csv
Reference: So, still about this feeling of presence in the back, this old man in a grey suit. Do you sometimes feel that this is the person, this presence trying to interact with you, communicate with you? Not really, because they still talk to each other, but very slowly. The gentleman in grey? Yes, but he talks to the others who are with this grey gentleman. I see him, I see him every night, but in principle he is alone. He’s all alone. Okay. So, he’s not trying to talk to you? No. Do you feel like he’s trying to read your mind or that he already knows what you’re thinking? I don’t know. Sometimes I wonder, do they already know what’s going to happen tomorrow? Does that mean? Yes, because. How do you explain it? As if he knew what was going to happen tomorrow. Who could predict what was going to happen? Yes, something like that? Predict? Yes, or maybe he does a calculation. A calculation to know an average? I don’t know what 

Evaluating Files: 100%|██████████| 3/3 [00:00<00:00,  5.24it/s]


{'7-1_script_interview_clinique_3_21-08-2020.csv': {'METEOR': 0.6640033880001245,
  'BLEU': 0.6391605177587003},
 '7-1_script_interview_clinique_4_21-08-2020.csv': {'METEOR': 0.8067108858215382,
  'BLEU': 0.6823608909809274},
 '7-2_script_interview_clinique_1_13-08-2020.csv': {'METEOR': 0.5677068874030731,
  'BLEU': 0.6427476275777899}}