In [1]:
import json
from pathlib import Path

import pandas as pd
from pandas.io.formats.style import Styler


def round_and_percentage(num: float) -> float:
    return round(num * 100, 2)


model_prefix_name_map = {"transk": "Transkribus", "tess": "Tesseract", "trocr": "TrOCR"}
dataset_map = {
    "ub_smi": "GT-Sámi (without base)",
    "smi": "GT-Sámi",
    "smi_nor": "GT-Sámi + GT-Nor",
    "smi_pred": "GT-Sámi + Pred-Sámi",
    "smi_nor_pred": "GT-Sámi + GT-Nor + Pred-Sámi",
}

In [2]:
index = pd.MultiIndex.from_product(
    [["CER", "WER", "Sámi letter F1"], ["overall", "sma", "sme", "smj", "smn"]]
)

In [3]:
from collections import defaultdict

## TODO: bytt ut med testset data
best_tesseract_model = "../output/test_evaluation/line_level/tess_sb_smi_nor_pred"
best_trocr_model = "../output/test_evaluation/line_level/trocr_smi_pred_synth"
best_transkribus_model = "../output/test_evaluation/line_level/transk_smi_nor_pred"


langs_in_order = ["all", "sma", "sme", "smj", "smn"]
scores_in_order = ["CER", "WER", "Sámi letter F1"]
index = pd.MultiIndex.from_product([scores_in_order, langs_in_order])

df_data = {}

for model_p in [best_trocr_model, best_tesseract_model, best_transkribus_model]:
    model_p = Path(model_p)
    model_prefix, _, model_info = model_p.name.partition("_")
    model_scores = defaultdict(defaultdict)

    for lang_file in model_p.glob("*rows.json"):
        lang = lang_file.stem.split("_")[0]
        scores = json.loads(lang_file.read_text(encoding="utf-8"))
        model_scores["WER"][lang] = round_and_percentage(scores["WER_concat"])
        model_scores["CER"][lang] = round_and_percentage(scores["CER_concat"])
        model_scores["Sámi letter F1"][lang] = round_and_percentage(scores["special_char_F1_mean"])

    df_data[model_prefix_name_map[model_prefix]] = [
        model_scores[score][lang] for score in scores_in_order for lang in langs_in_order
    ]

In [4]:
df = pd.DataFrame(df_data, index=index)
df

Unnamed: 0,Unnamed: 1,TrOCR,Tesseract,Transkribus
CER,all,0.33,2.49,0.29
CER,sma,0.33,2.86,0.28
CER,sme,0.18,2.23,0.18
CER,smj,0.61,2.19,0.31
CER,smn,0.32,2.67,0.41
WER,all,2.07,3.41,2.11
WER,sma,2.33,6.1,2.15
WER,sme,1.19,0.75,1.06
WER,smj,3.16,2.04,2.96
WER,smn,2.11,5.31,2.69


In [5]:
styler = Styler(df).format("{:.2f}")

min_cer = df.loc["CER"].min(axis=None)
max_cer = df.loc["CER"].max(axis=None)
min_wer = df.loc["WER"].min(axis=None)
max_wer = df.loc["WER"].max(axis=None)
min_f1 = df.loc["Sámi letter F1"].min(axis=None)
max_f1 = df.loc["Sámi letter F1"].max(axis=None)

styler.background_gradient(
    cmap="Greens_r", vmin=min_cer, vmax=max_cer, subset=pd.IndexSlice["CER", :]
)
styler.background_gradient(
    cmap="Greens_r", vmin=min_wer, vmax=max_wer, subset=pd.IndexSlice["WER", :]
)
styler.background_gradient(
    cmap="Greens", vmin=min_f1, vmax=max_f1, subset=pd.IndexSlice["Sámi letter F1", :]
)
styler.format_index(
    {
        "CER": r"CER \(\downarrow\) [\(\%\)]",
        "WER": r"WER \(\downarrow\) [\(\%\)]",
        "Sámi letter F1": r"Sámi letter F1 \(\uparrow\) [\(\%\)]",
    }.__getitem__,
    level=0,
)
styler.format_index(
    {
        "all": "Overall",
        "sma": "South",
        "sme": "North",
        "smj": "Lule",
        "smn": "Inari",
    }.__getitem__,
    level=1,
)

print(
    styler.to_latex(
        convert_css=True, multirow_align="t", hrules=True, clines="skip-last;data"
    ).replace(
        r"\cline{1-5}", r"\midrule", 2
    )  # Repalce cline with midrule since midrule will add some extra whitespace
)

\begin{tabular}{llrrr}
\toprule
 &  & TrOCR & Tesseract & Transkribus \\
\midrule
\multirow[t]{5}{*}{CER \(\downarrow\) [\(\%\)]} & Overall & {\cellcolor[HTML]{005622}} \color[HTML]{F1F1F1} 0.33 & {\cellcolor[HTML]{E2F4DD}} \color[HTML]{000000} 2.49 & {\cellcolor[HTML]{005120}} \color[HTML]{F1F1F1} 0.29 \\
 & South & {\cellcolor[HTML]{005622}} \color[HTML]{F1F1F1} 0.33 & {\cellcolor[HTML]{F7FCF5}} \color[HTML]{000000} 2.86 & {\cellcolor[HTML]{005020}} \color[HTML]{F1F1F1} 0.28 \\
 & North & {\cellcolor[HTML]{00441B}} \color[HTML]{F1F1F1} 0.18 & {\cellcolor[HTML]{CBEAC4}} \color[HTML]{000000} 2.23 & {\cellcolor[HTML]{00441B}} \color[HTML]{F1F1F1} 0.18 \\
 & Lule & {\cellcolor[HTML]{0A7633}} \color[HTML]{F1F1F1} 0.61 & {\cellcolor[HTML]{C8E9C1}} \color[HTML]{000000} 2.19 & {\cellcolor[HTML]{005321}} \color[HTML]{F1F1F1} 0.31 \\
 & Inari & {\cellcolor[HTML]{005522}} \color[HTML]{F1F1F1} 0.32 & {\cellcolor[HTML]{EDF8E9}} \color[HTML]{000000} 2.67 & {\cellcolor[HTML]{005F26}} \color[HTML]{F