# TODO

- Use the preferred terms rather than the first synonyms

In [18]:
# !pip install transformers
# !pip install deepl
# !pip install tqdm
# !pip install evaluate
# !pip install termcolor
# !pip install Levenshtein
# !pip install nltk
# !pip install cer

Collecting cer
  Downloading cer-1.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading cer-1.2.0-py3-none-any.whl (17 kB)
Installing collected packages: cer
Successfully installed cer-1.2.0


In [4]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from snomed_graph import *
import getpass
import deepl
from tqdm.notebook import tqdm
import json
import numpy as np
import evaluate
from termcolor import colored
from collections import namedtuple
from operator import __or__
from functools import reduce
from ast import literal_eval
from Levenshtein import ratio
from itertools import chain

In [5]:
AYA_CHECKPOINT = "CohereForAI/aya-101"
PATH_TO_SERIALIZED_SNOMED_GRAPH = "./data/snomed_graph/full_concept_graph.gml"
PATH_TO_TRANSLATION_SAMPLES = "./data/prepared_translation_data/samples.csv"
PATH_TO_ALL_TRANSLATION_REFERENCES = "./data/prepared_translation_data/sampling_df.csv"
PATH_TO_DEEPL_TRANSLATION_RESULTS = "./data/cache/deepl_results.json"
PATH_TO_AYA_VANILLA_TRANSLATION_RESULTS = "./data/cache/aya_results_vanilla.json"
PATH_TO_AYA_ENRICHED_TRANSLATION_RESULTS = "./data/cache/aya_results_enriched.json"

In [6]:
DEEPL_AUTH_KEY = getpass.getpass()

 ········


In [7]:
langcodes = {
    "Dutch": "NL",
    "Estonian": "ET",
    "Korean": "KO",
    "Swedish": "SV",
}

In [8]:
important_attributes = {
    # 'Access (attribute)',
    # 'After (attribute)',
    'Associated finding (attribute)',
    'Associated morphology (attribute)',
    'Associated procedure (attribute)',
    'Associated with (attribute)',
    'Before (attribute)',
    'Causative agent (attribute)',
    'Characterizes (attribute)',
    # 'Clinical course (attribute)',
    'Component (attribute)',
    'Direct device (attribute)',
    'Direct morphology (attribute)',
    'Direct site (attribute)',
    'Direct substance (attribute)',
    'Due to (attribute)',
    'During (attribute)',
    # 'Finding context (attribute)',
    'Finding informer (attribute)',
    'Finding method (attribute)',
    'Finding site (attribute)',
    'Has absorbability (attribute)',
    'Has active ingredient (attribute)',
    'Has basic dose form (attribute)',
    'Has basis of strength substance (attribute)',
    'Has coating material (attribute)',
    'Has compositional material (attribute)',
    'Has concentration strength denominator unit (attribute)',
    'Has concentration strength numerator unit (attribute)',
    'Has device intended site (attribute)',
    'Has disposition (attribute)',
    'Has dose form administration method (attribute)',
    'Has dose form intended site (attribute)',
    'Has dose form release characteristic (attribute)',
    'Has dose form transformation (attribute)',
    'Has filling (attribute)',
    'Has focus (attribute)',
    'Has ingredient qualitative strength (attribute)',
    'Has intent (attribute)',
    # 'Has interpretation (attribute)',
    'Has manufactured dose form (attribute)',
    'Has precise active ingredient (attribute)',
    'Has presentation strength denominator unit (attribute)',
    'Has presentation strength numerator unit (attribute)',
    'Has realization (attribute)',
    'Has specimen (attribute)',
    'Has state of matter (attribute)',
    'Has surface texture (attribute)',
    'Has target population (attribute)',
    'Has unit of presentation (attribute)',
    'Indirect device (attribute)',
    'Indirect morphology (attribute)',
    'Inherent location (attribute)',
    'Inheres in (attribute)',
    'Interprets (attribute)',
    # 'Is a (attribute)',
    'Is modification of (attribute)',
    'Is sterile (attribute)',
    'Laterality (attribute)',
    'Measurement method (attribute)',
    'Method (attribute)',
    'Occurrence (attribute)',
    'Pathological process (attribute)',
    'Plays role (attribute)',
    'Precondition (attribute)',
    'Priority (attribute)',
    'Procedure context (attribute)',
    'Procedure device (attribute)',
    'Procedure morphology (attribute)',
    'Procedure site (attribute)',
    'Procedure site - Direct (attribute)',
    'Procedure site - Indirect (attribute)',
    'Process acts on (attribute)',
    'Process duration (attribute)',
    'Process extends to (attribute)',
    'Process output (attribute)',
    'Property (attribute)',
    'Recipient category (attribute)',
    'Relative to (attribute)',
    'Relative to part of (attribute)',
    'Revision status (attribute)',
    'Route of administration (attribute)',
    # 'Scale type (attribute)',
    # 'Severity (attribute)',
    'Specimen procedure (attribute)',
    'Specimen source identity (attribute)',
    'Specimen source morphology (attribute)',
    'Specimen source topography (attribute)',
    'Specimen substance (attribute)',
    # 'Subject relationship context (attribute)',
    'Surgical approach (attribute)',
    'Technique (attribute)',
    # 'Temporal context (attribute)',
    # 'Temporally related to (attribute)',
    # 'Time aspect (attribute)',
    # 'Units (attribute)',
    'Using access device (attribute)',
    'Using device (attribute)',
    'Using energy (attribute)',
    'Using substance (attribute)'
}

# 1. Load the data

## 1.1 Load the concepts to translate

In [25]:
# Columns are: sctid, fsn, hierarchy, language, context_tier, depth_tier, translations
all_df = (
    pd.read_csv(PATH_TO_TRANSLATION_SAMPLES)
    .set_index(["sctid", "language"])
)

all_df.reference_translations = all_df.reference_translations.apply(literal_eval)

all_df.shape[0]

12640

In [10]:
# all_df = all_df.sample(20)

## 1.2 Load the full set of reference translations

In [11]:
# Columns are: sctid, fsn, hierarchy, language, context_tier, depth_tier, translations
ref_df = (
    pd.read_csv(PATH_TO_ALL_TRANSLATION_REFERENCES)
    .set_index(["sctid", "language"])
)

ref_df.translations = ref_df.translations.apply(literal_eval)

ref_df = ref_df.rename(axis="columns", mapper={"translations": "reference_translations"})

ref_df.shape[0]

651355

## 1.3 Load the SNOMED graph object

In [12]:
G = SnomedGraph.from_serialized(PATH_TO_SERIALIZED_SNOMED_GRAPH)

SNOMED graph has 361179 vertices and 1179749 edges


# 2. Evaluation Harness

In [19]:
google_bleu = evaluate.load("google_bleu")
character = evaluate.load("character")

In [20]:
def exact_match(predictions, references):
    N = len(predictions)
    n = 0
    for p, r in zip(predictions, references):
        if p in r:
            n += 1
    return {'exact_match': float(n)/N}

In [21]:
def levenshtein_ratio(predictions, references):
    ratios = [
        np.max([ratio(p, r) for r in refs])
        for p, refs in zip(predictions, references)
    ]
    return {'levenshtein_ratio': np.mean(ratios)}

In [22]:
def evaluate_translations(row_or_df, target_column):
    if isinstance(row_or_df, pd.DataFrame):
        assert target_column in row_or_df.columns    
        candidates = list(row_or_df.to_dict()[target_column].values())
        references = row_or_df.reference_translations.tolist()
    else:
        candidates = [getattr(row_or_df, target_column)]
        references = [row_or_df.reference_translations]
    results = [
        exact_match(predictions=candidates, references=references),
        levenshtein_ratio(predictions=candidates, references=references),
        google_bleu.compute(predictions=candidates, references=references),
        character.compute(predictions=candidates, references=references),
    ]
    results = reduce(__or__, results, dict())
    return results

# 3. Generate baseline translations with DeepL

In [32]:
translator = deepl.Translator(DEEPL_AUTH_KEY)

def translate_with_deepl(df, G):
    
    with open(PATH_TO_DEEPL_TRANSLATION_RESULTS, "r") as f:
        deepl_results = json.load(f)
    
    for it, row in enumerate(tqdm(df.itertuples(), total=df.shape[0])):
        sctid, language = row.Index
        langcode = langcodes[language]
        source_concept = G.get_concept_details(sctid)
        source_preferred_term = source_concept.fsn.replace(f"({source_concept.hierarchy})", "").strip()
        key = str(sctid) + "_" + language
        try:
            yield deepl_results[key]
        except KeyError:
            deepl_result = translator.translate_text(source_preferred_term, target_lang=langcode)
            deepl_results[key] = deepl_result.text
            yield deepl_result.text
        if it % 100 == 0:
            with open(PATH_TO_DEEPL_TRANSLATION_RESULTS, "w") as f:
                json.dump(deepl_results, f)

    with open(PATH_TO_DEEPL_TRANSLATION_RESULTS, "w") as f:
        json.dump(deepl_results, f)

In [33]:
all_df["deepl_translation"] = list(translate_with_deepl(all_df, G))

  0%|          | 0/12640 [00:00<?, ?it/s]

In [34]:
evaluate_translations(all_df, "deepl_translation")

{'exact_match': 0.08662974683544304,
 'levenshtein_ratio': 0.7370298864393613,
 'google_bleu': 0.2459922409495806,
 'cer_score': 0.381689058931287}

# 4. Generate translations using "vanilla" Aya model.

In [None]:
tokenizer = AutoTokenizer.from_pretrained(AYA_CHECKPOINT)
aya_model = AutoModelForSeq2SeqLM.from_pretrained(AYA_CHECKPOINT)

model.safetensors.index.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/11 [00:00<?, ?it/s]

model-00001-of-00011.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00004-of-00011.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00005-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00006-of-00011.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00007-of-00011.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00008-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00009-of-00011.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00010-of-00011.safetensors:   0%|          | 0.00/2.99G [00:00<?, ?B/s]



model-00011-of-00011.safetensors:   0%|          | 0.00/4.10G [00:00<?, ?B/s]

In [17]:
def aya_postprocessor(result):
    return (
        result
        .replace(tokenizer.eos_token, "")
        .replace(tokenizer.pad_token, "")
        .replace(".", "")
        .strip()
    )

## 4.1 Test Aya with a few translations into English.  Let's see what kind of responses it produces

In [None]:
def test_translate_with_aya(df, G):
    for row in tqdm(df.itertuples(), total=df.shape[0]):
        sctid, language = row.Index
        preferred_term = row.reference_translations[0]
        reference_translations = G.get_concept_details(sctid).synonyms
        # ICL
        try:
            icl_row = next(df[(df.index.get_level_values(0) != sctid) & (df.index.get_level_values(1) == language)].sample(1).itertuples())
        except ValueError:
            pass
        else:
            icl_sctid = icl_row.Index[0]
            icl_preferred_term = icl_row.reference_translations[0]
            icl_reference_translations = G.get_concept_details(icl_sctid).synonyms
            # construct prompt
            prompt_template = 'Translate the following clinical concept into English: "{{PREFERRED_TERM}}". {{TRANSLATED_TERM}}.\n'
            prompt = (
                prompt_template.replace("{{PREFERRED_TERM}}", icl_preferred_term).replace("{{TRANSLATED_TERM}}", icl_reference_translations[0]) +
                prompt_template.replace("{{PREFERRED_TERM}}", preferred_term).replace("{{TRANSLATED_TERM}}.\n", "")
            )
            print(prompt)
            input = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
            output = aya_model.generate(input, max_new_tokens=256)
            result = tokenizer.decode(output[0])
            result = aya_postprocessor(result)
            
            print(
                colored("\nSCTID: ", "red", attrs=['bold']),
                sctid,
                colored("\nSource Language: ", "red", attrs=['bold']),
                language,
                colored("\nPreferred Term: ", "red", attrs=['bold']),
                preferred_term,
                colored("\nReference Translations: ", "red", attrs=['bold']),
                reference_translations,
                colored("\nAya Translation: ", "red", attrs=['bold']),            
                result,
                "\n\n",
                # colored("\nAya Scores: ", "red", attrs=['bold']),
                # ", ".join([k+": "+str(v) for k,v in scores.items()]),
            )

In [None]:
test_translate_with_aya(all_df.sample(10), G)

## 4.2 Translate from English into our target languages

In [77]:
def prepare_aya_vanilla_prompt(row, G, df):
    sctid, language = row.Index
    concept = G.get_concept_details(sctid)
    preferred_term = concept.fsn.replace(f"({concept.hierarchy})", "").strip()
    if language == "Swedish":
        return f'Translate the following clinical concept into Swedish: "Pain disorder with psychological factor". smärtsyndrom med psykologisk faktor.\nTranslate the following clinical concept into Swedish: "{preferred_term}". '
    elif language == "Estonian":
        return f'Translate the following clinical concept into Estonian: "Osseous choristoma". Luuline koristoom. \nTranslate the following clinical concept into Estonian: "{preferred_term}". '
    elif language == "Korean":
        return f'Translate the following clinical concept into Korean: "Endoscopic excision of lesion of esophagus". 식도 병변 내시경 절제. \nTranslate the following clinical concept into Korean: "{preferred_term}". '
    elif language == "Dutch":
        return f'Translate the following clinical concept into Dutch: "Open repair of lumbar hernia using biological mesh".  open hernioplastiek van hernia lumbalis met biologisch matje.\nTranslate the following clinical concept into Dutch: "{preferred_term}". '
    else:
        raise ValueError()

In [78]:
def translate_with_aya(df, G, prompt_assembler, ref_df=None, results_filepath=None, rebuild=False, save=False):

    if rebuild:
        results = dict()
    else:
        with open(results_filepath, "r") as f:
            results = json.load(f)
    
    for row in tqdm(df.itertuples(), total=df.shape[0]):
        sctid, language = row.Index
        key = str(sctid) + "_" + language
        try:
            yield results[key]
        except KeyError:
            prompt = prompt_assembler(row, G, ref_df)
            input = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
            output = aya_model.generate(input, max_new_tokens=256)
            result = tokenizer.decode(output[0])
            result = aya_postprocessor(result)
            results[key] = result
            yield result

    if save:
        with open(results_filepath, "w") as f:
            json.dump(results, f)

In [79]:
all_df["aya_vanilla_translation"] = list(translate_with_aya(
    all_df, G, prepare_aya_vanilla_prompt, PATH_TO_AYA_VANILLA_TRANSLATION_RESULTS, rebuild=True, save=False
))

  0%|          | 0/20 [00:00<?, ?it/s]

In [80]:
evaluate_translations(all_df, "aya_vanilla_translation")

{'exact_match': 0.05,
 'levenshtein_ratio': 0.6441056628959896,
 'google_bleu': 0.12608695652173912,
 'cer_score': 0.5268120738942421}

# 5. Evaluate Aya with enriched prompt

In [None]:
enriched_prompt_template = """
You are a medical translation expert.
Your job is to translate formal clinical terms found within the SNOMED Concept Terminology into {{TARGET_LANGUAGE}}.
The concept you need to translate is “{{PREFERRED_TERM}}”.
Here is some information about the concept which may help you:
{{SYNONYMS_FRAGMENT}}
{{HIERARCHY_FRAGMENT}}
{{PARENTS_FRAGMENT}}
{{RELATIONSHIPS_FRAGMENT}}
Now, the translation of “{{PREFERRED_TERM}}” into {{TARGET_LANGUAGE}} is:"""

In [None]:
def generate_prompt_synonyms_fragment(preferred_term, synonyms):
    if len(synonyms) == 0:
        return ""
    else:
        syn_str = '"' + '" and "'.join(synonyms) + '"'
        return f'In English, synonyms for "{preferred_term}" include: {syn_str}.'

In [None]:
def generate_prompt_hierarchy_fragment(preferred_term, hierarchy):
    return f'"{preferred_term}" is a {hierarchy}.'

In [None]:
def generate_prompt_parents_fragment(preferred_term, parents):
    fragment = ""
    for p in parents:
        fragment += f'"{preferred_term}" is a kind of {p.synonyms[0]}.\n'
    return fragment

In [None]:
def generate_prompt_relationships_fragment(preferred_term, relationship_groups):
    fragment = ""
    for g in relationship_groups:
        for r in g.relationships:
            type = r.type.replace(" (attribute)", "").lower()
            tgt = r.tgt.synonyms[0]
            fragment += f'"{preferred_term}" has {type} {tgt}\n'
    return fragment

In [None]:
def prepare_aya_enriched_prompt(row, G, df):
    sctid, language = row.Index
    concept = G.get_full_concept(sctid)
    preferred_term = concept.synonyms[0]
    return (
        enriched_prompt_template
        .replace("{{TARGET_LANGUAGE}}", language)
        .replace("{{PREFERRED_TERM}}", preferred_term)
        .replace("{{SYNONYMS_FRAGMENT}}", generate_prompt_synonyms_fragment(preferred_term, concept.synonyms[1:]))
        .replace("{{HIERARCHY_FRAGMENT}}", generate_prompt_hierarchy_fragment(preferred_term, concept.hierarchy))
        .replace("{{PARENTS_FRAGMENT}}", generate_prompt_parents_fragment(preferred_term, concept.parents))
        .replace("{{RELATIONSHIPS_FRAGMENT}}", generate_prompt_relationships_fragment(preferred_term, concept.inferred_relationship_groups))
        .replace("\n\n", "\n")
    )

In [None]:
print(prepare_aya_enriched_prompt(next(all_df.itertuples()), G, df))

In [None]:
df["aya_enriched_translation"] = list(translate_with_aya(all_df, G, prepare_aya_enriched_prompt, PATH_TO_AYA_ENRICHED_TRANSLATION_RESULTS, rebuild=True))

In [None]:
evaluate_translations(all_df, "aya_enriched_translation")

# 6. Translate Context Tier 1 Concepts

In [82]:
def prepare_aya_ct1_prompt(row, G, ref_df):
    sctid, language = row.Index
    concept = G.get_full_concept(sctid)
    preferred_term = concept.synonyms[0]
    parent_concepts = [
        G.get_full_concept(p.sctid) for p in concept.parents
    ]
    parent_data = [
        (
            c.fsn.replace(f"({c.hierarchy})", "").strip(),
            ref_df.loc[(c.sctid, language)].reference_translations[0],
        )
        for c in parent_concepts
    ]
    prompt_fragments = [
        f'Translate the following clinical concept into {language}: "{pt}". {rt}.'
        for pt, rt in parent_data
    ]    
    prompt = '\n'.join(prompt_fragments)
    prompt += f'\nTranslate the following clinical concept into {language}: "{preferred_term}". '
    
    return prompt

In [83]:
ct1_df = all_df[all_df.context_tier.isin(["Tier 1", "Tier 2"])]
ct1_df.shape[0]

14

In [84]:
ct1_df["aya_ct1_translation"] = list(translate_with_aya(ct1_df, G, prepare_aya_ct1_prompt, ref_df, None, rebuild=True, save=False))

  0%|          | 0/14 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ct1_df["aya_ct1_translation"] = list(translate_with_aya(ct1_df, G, prepare_aya_ct1_prompt, ref_df, None, rebuild=True, save=False))


In [85]:
evaluate_translations(ct1_df, "aya_ct1_translation")

{'exact_match': 0.07142857142857142,
 'levenshtein_ratio': 0.8101746877439974,
 'google_bleu': 0.4685714285714286,
 'cer_score': 0.3016446552134548}

# 7. Translate all Context Tier 2 Concepts

In [87]:
ct2_df = ct1_df[ct1_df.context_tier == "Tier 2"]
ct2_df.shape[0]

8

In [88]:
def prepare_aya_ct2_prompt(row, G, ref_df):
    sctid, language = row.Index
    concept = G.get_full_concept(sctid)
    preferred_term = concept.synonyms[0]
    parent_concepts = [
        G.get_full_concept(p.sctid) for p in concept.parents
    ]
    parent_data = [
        (
            c.fsn.replace(f"({c.hierarchy})", "").strip(),
            ref_df.loc[(c.sctid, language)].reference_translations[0],
        )
        for c in parent_concepts
    ] 
    related_concepts = [
        G.get_full_concept(r.tgt.sctid)
        for g in concept.inferred_relationship_groups
        for r in g.relationships
        if r.type in important_attributes        
    ]    
    relationship_data = [
        (
            c.fsn.replace(f"({c.hierarchy})", "").strip(),
            ref_df.loc[(c.sctid, language)].reference_translations[0],
        )
        for c in related_concepts
    ]
    prompt_fragments = [
        f'Translate the following clinical concept into {language}: "{pt}". {rt}.'
        for pt, rt in chain(parent_data, relationship_data)
    ]
    prompt = '\n'.join(prompt_fragments)
    prompt += f'\nTranslate the following clinical concept into {language}: "{preferred_term}". '
    
    return prompt

In [92]:
print(prepare_aya_ct2_prompt(next(ct2_df.sample(1).itertuples()), G, ref_df))

Translate the following clinical concept into Swedish: "Disease caused by cestodes of order Cyclophyllidea". sjukdom orsakad av cestoder av ordningen Cyclophyllidea.
Translate the following clinical concept into Swedish: "Parasitic process". parasitprocess.
Translate the following clinical concept into Swedish: "Family Hymenolepididae". familjen Hymenolepididae.
Translate the following clinical concept into Swedish: "Disease due to Hymenolepididae". 


In [93]:
ct2_df["aya_ct2_translation"] = list(translate_with_aya(ct2_df, G, prepare_aya_ct2_prompt, ref_df, None, rebuild=True, save=False))

  0%|          | 0/8 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ct2_df["aya_ct2_translation"] = list(translate_with_aya(ct2_df, G, prepare_aya_ct2_prompt, ref_df, None, rebuild=True, save=False))


In [94]:
evaluate_translations(ct2_df, "deepl_translation")

{'exact_match': 0.0,
 'levenshtein_ratio': 0.7589359181736978,
 'google_bleu': 0.22727272727272727,
 'cer_score': 0.37792829171521103}

In [95]:
evaluate_translations(ct2_df, "aya_vanilla_translation")

{'exact_match': 0.125,
 'levenshtein_ratio': 0.7022009079421869,
 'google_bleu': 0.20652173913043478,
 'cer_score': 0.5259903791737408}

In [96]:
evaluate_translations(ct2_df, "aya_ct1_translation")

{'exact_match': 0.125,
 'levenshtein_ratio': 0.8029320404816109,
 'google_bleu': 0.40476190476190477,
 'cer_score': 0.33861272454562386}

In [97]:
evaluate_translations(ct2_df, "aya_ct2_translation")

{'exact_match': 0.0,
 'levenshtein_ratio': 0.7669539464016015,
 'google_bleu': 0.36363636363636365,
 'cer_score': 0.35773258789299145}

# 8. Final Evaluations

# Sampling

In [28]:
for row in ct1_df.sample(10).itertuples():
    sctid, language = row.Index
    preferred_term = G.get_concept_details(sctid).synonyms[0]
    deepl_results = evaluate_translations(row, "deepl_translation")
    vanilla_aya_results = evaluate_translations(row, "aya_vanilla_translation")
    ct1_aya_results = evaluate_translations(row, "aya_ct1_translation")
    print(
        colored("\nSCTID: ", "red", attrs=['bold']),
        sctid,
        colored("\nTarget Language: ", "red", attrs=['bold']),
        language,
        colored("\nEnglish Preferred Term: ", "red", attrs=['bold']),
        preferred_term,
        colored("\nReference Translations: ", "red", attrs=['bold']),
        row.reference_translations,
        colored("\nDeepL Translation: ", "red", attrs=['bold']),
        row.deepl_translation,
        colored("\nDeepL Scores: ", "red", attrs=['bold']),
        ", ".join([k+": "+str(v) for k,v in deepl_results.items()]),
        colored("\nVanilla Aya Translation: ", "red", attrs=['bold']),
        row.aya_vanilla_translation,
        colored("\nVanilla Aya Scores: ", "red", attrs=['bold']),
        ", ".join([k+": "+str(v) for k,v in vanilla_aya_results.items()]),        
        colored("\nAya CT1 Translation: ", "red", attrs=['bold']),
        row.aya_ct1_translation,
        colored("\nAya CT1 Scores: ", "red", attrs=['bold']),
        ", ".join([k+": "+str(v) for k,v in ct1_aya_results.items()]),                
    )

[1m[31m
SCTID: [0m 730834007 [1m[31m
Target Language: [0m Swedish [1m[31m
English Preferred Term: [0m Entire fibrofatty tissue [1m[31m
Reference Translations: [0m ['fibrös fettvävnad, som helhet'] [1m[31m
DeepL Translation: [0m Hela den fibro-feta vävnaden [1m[31m
DeepL Scores: [0m exact_match: 0.0, levenshtein_ratio: 0.49122807017543857, google_bleu: 0.0, cer_score: 0.8928571428571429 [1m[31m
Vanilla Aya Translation: [0m hela fibroserat vävnad [1m[31m
Vanilla Aya Scores: [0m exact_match: 0.0, levenshtein_ratio: 0.5098039215686274, google_bleu: 0.0, cer_score: 1.0 [1m[31m
Aya CT1 Translation: [0m hela fibrös fettvävnad [1m[31m
Aya CT1 Scores: [0m exact_match: 0.0, levenshtein_ratio: 0.6666666666666667, google_bleu: 0.21428571428571427, cer_score: 0.9545454545454546
[1m[31m
SCTID: [0m 727929002 [1m[31m
Target Language: [0m Swedish [1m[31m
English Preferred Term: [0m Entire artery of female pelvic region [1m[31m
Reference Translations: [0m ['artä