# Overview

Samples a set of translations for review by the NRCs.  We are looking for a judgement on whether Aya translations are better (or worse than) DeepL translations.

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("data/translation_outputs/translations.csv").set_index(["sctid", "language"])
df.sample(3)

In [None]:
def related_translations(row):
    language = row.Index[1]
    preferred_term = row.fsn.replace(f"({row.hierarchy})", "").strip()
    return (
        row
        .rag_aya_prompt
        .replace(f"Translate the following clinical concept into {language}: ", "")
        .replace(preferred_term, "")
        .replace('"', "")
        [0:-1]
    )

In [None]:
df["related_translations"] = [related_translations(row) for row in df.itertuples()]

In [None]:
# Sample 100 from each language

sample_df = pd.concat([
    (
        df    
        [df.index.get_level_values(1) == language]
        .reset_index()
        .sample(100)
        [["sctid", "language", "fsn", "related_translations", "deepl_translation", "rag_aya_translation"]]
    )
    for language in ["Korean", "Dutch", "Swedish", "Estonian"]
])

In [None]:
# Now, hide the source of each translation 
sample_df["translation_A_source"] = ["rag_aya_translation" if np.random.rand() > 0.5 else "deepl_translation" for _ in sample_df.itertuples()]
sample_df["translation_B_source"] = ["rag_aya_translation" if row.translation_A_source == "deepl_translation" else "deepl_translation" for row in sample_df.itertuples()]
sample_df["translation_A"] = [row.rag_aya_translation if row.translation_A_source == "rag_aya_translation" else row.deepl_translation for row in sample_df.itertuples()]
sample_df["translation_B"] = [row.rag_aya_translation if row.translation_B_source == "rag_aya_translation" else row.deepl_translation for row in sample_df.itertuples()]
sample_df.sample(3)

In [None]:
# This is the version we send to the teams
for language, grp in sample_df.groupby("language"):
    grp[['sctid', 'fsn', 'related_translations', 'translation_A', 'translation_B']].to_csv(f"./data/human_eval_data/{language}_evaluation.csv", index=False, encoding="utf-8")

In [None]:
# We keep this version so we can match back and evaluate
sample_df.to_csv("./data/human_eval_data/human_eval_master.csv", index=False)