In [None]:

import os

import pandas as pd

from automatic_evaluation.scores import (
    sentence_bleu_scores,
    TER_sentence,
    ChrF_sentence,
)

# Change dir to the root of the project
try:  # When working in Docker
    os.chdir("/lab")
except FileNotFoundError:
    current_dir = os.path.abspath("")

    # Check if we have to move to the parent directory
    if os.path.basename(current_dir) == "scripts":
        parent_dir = os.path.dirname(current_dir)
        os.chdir(parent_dir)

print(os.getcwd())

In [None]:
# filename = "DATA/Preselectie-D2.xlsx"
filename = "DATA/Preselectie-D3.xlsx"

df = pd.read_excel(filename)

src = df["English"].values
ref = df["Human Translation"].values

deepl = df["DeepL"].values
modernmt = df["ModernMT"].values
opennmt = df["OpenNMT"].values

# Convert NaN to empty string
def convert_nan(x):
    if isinstance(x, str):
        return x
    else:
        return ""

deepl = [convert_nan(x) for x in deepl]
modernmt = [convert_nan(x) for x in modernmt]
opennmt = [convert_nan(x) for x in opennmt]
ref = [convert_nan(x) for x in ref]


In [None]:
deepl

In [None]:
metrics = {
    "BLEU": sentence_bleu_scores,
    "TER": TER_sentence,
    "ChrF": ChrF_sentence,
}

for metric_name, metric in metrics.items():
    print(metric_name)
    # print("DeepL", metric(ref, deepl))

    df[f"DeepL {metric_name}"] = metric(ref, deepl)
    df[f"ModernMT {metric_name}"] = metric(ref, modernmt)
    df[f"OpenNMT {metric_name}"] = metric(ref, opennmt)



filename_out = "-scores".join(os.path.splitext(filename))
df.to_excel(filename_out, index=False)
