In [1]:
import json
from pathlib import Path

import pandas as pd
from pandas.io.formats.style import Styler


def round_and_percentage(num: float) -> float:
    return round(num * 100, 2)


model_prefix_name_map = {"transk": "Transkribus", "tess": "Tesseract", "trocr": "TrOCR"}
dataset_map = {
    "ub_smi": "GT-Sámi (without base)",
    "smi": "GT-Sámi",
    "smi_nor": "GT-Sámi + GT-Nor",
    "smi_pred": "GT-Sámi + Pred-Sámi",
    "smi_nor_pred": "GT-Sámi + GT-Nor + Pred-Sámi",
}

In [2]:
index = pd.MultiIndex.from_product(
    [["CER", "WER", "Sámi letter F1"], ["overall", "sma", "sme", "smj", "smn"]]
)

In [3]:
from collections import defaultdict

## TODO: bytt ut med testset data
best_tesseract_model = "../../output/testset_evaluation/line_level/tess_sb_smi_nor_pred"
best_trocr_model = "../../output/testset_evaluation/line_level/trocr_smi_pred_synth"
best_transkribus_model = (
    "../../output/testset_evaluation/line_level/transk_smi_nor_pred"
)


langs_in_order = ["all", "sma", "sme", "smj", "smn"]
scores_in_order = ["CER", "WER", "Sámi letter F1"]
index = pd.MultiIndex.from_product([scores_in_order, langs_in_order])

df_data = {}

for model_p in [best_trocr_model, best_tesseract_model, best_transkribus_model]:
    model_p = Path(model_p)
    if not model_p.exists():
        print(model_p)
        continue
    model_prefix, _, _ = model_p.name.partition("_")

    model_scores = defaultdict(defaultdict)

    for lang_file in model_p.glob("*rows.json"):
        lang = lang_file.stem.split("_")[0]
        scores = json.loads(lang_file.read_text(encoding="utf-8"))
        model_scores["WER"][lang] = round_and_percentage(scores["WER_concat"])
        model_scores["CER"][lang] = round_and_percentage(scores["CER_concat"])
        model_scores["Sámi letter F1"][lang] = round_and_percentage(
            scores["special_char_F1_mean"]
        )

    df_data[model_prefix_name_map[model_prefix]] = [
        model_scores[score][lang]
        for score in scores_in_order
        for lang in langs_in_order
    ]

In [4]:
df = pd.DataFrame(df_data, index=index)
df

Unnamed: 0,Unnamed: 1,TrOCR,Tesseract,Transkribus
CER,all,0.65,2.82,0.45
CER,sma,0.57,3.09,0.53
CER,sme,0.77,2.87,0.35
CER,smj,0.77,2.35,0.47
CER,smn,0.44,2.8,0.53
WER,all,3.47,4.88,3.23
WER,sma,4.31,7.99,4.13
WER,sme,2.59,2.94,1.8
WER,smj,4.51,3.28,4.31
WER,smn,3.51,6.73,4.1


In [5]:
styler = Styler(df).format("{:.2f}")

min_cer = df.loc["CER"].min(axis=None)
max_cer = df.loc["CER"].max(axis=None)
min_wer = df.loc["WER"].min(axis=None)
max_wer = df.loc["WER"].max(axis=None)
min_f1 = df.loc["Sámi letter F1"].min(axis=None)
max_f1 = df.loc["Sámi letter F1"].max(axis=None)

styler.background_gradient(
    cmap="Greens_r", vmin=min_cer, vmax=max_cer, subset=pd.IndexSlice["CER", :]
)
styler.background_gradient(
    cmap="Greens_r", vmin=min_wer, vmax=max_wer, subset=pd.IndexSlice["WER", :]
)
styler.background_gradient(
    cmap="Greens", vmin=min_f1, vmax=max_f1, subset=pd.IndexSlice["Sámi letter F1", :]
)
styler.format_index(
    {
        "CER": r"CER \(\downarrow\) [\(\%\)]",
        "WER": r"WER \(\downarrow\) [\(\%\)]",
        "Sámi letter F1": r"Sámi letter F1 \(\uparrow\) [\(\%\)]",
    }.__getitem__,
    level=0,
)
styler.format_index(
    {
        "all": "Overall",
        "sma": "South",
        "sme": "North",
        "smj": "Lule",
        "smn": "Inari",
    }.__getitem__,
    level=1,
)

print(
    styler.to_latex(
        convert_css=True, multirow_align="t", hrules=True, clines="skip-last;data"
    ).replace(
        r"\cline{1-5}", r"\midrule", 2
    )  # Repalce cline with midrule since midrule will add some extra whitespace
)

\begin{tabular}{llrrr}
\toprule
 &  & TrOCR & Tesseract & Transkribus \\
\midrule
\multirow[t]{5}{*}{CER \(\downarrow\) [\(\%\)]} & Overall & {\cellcolor[HTML]{00682A}} \color[HTML]{F1F1F1} 0.65 & {\cellcolor[HTML]{E9F7E5}} \color[HTML]{000000} 2.82 & {\cellcolor[HTML]{005020}} \color[HTML]{F1F1F1} 0.45 \\
 & South & {\cellcolor[HTML]{005E26}} \color[HTML]{F1F1F1} 0.57 & {\cellcolor[HTML]{F7FCF5}} \color[HTML]{000000} 3.09 & {\cellcolor[HTML]{005924}} \color[HTML]{F1F1F1} 0.53 \\
 & North & {\cellcolor[HTML]{087432}} \color[HTML]{F1F1F1} 0.77 & {\cellcolor[HTML]{ECF8E8}} \color[HTML]{000000} 2.87 & {\cellcolor[HTML]{00441B}} \color[HTML]{F1F1F1} 0.35 \\
 & Lule & {\cellcolor[HTML]{087432}} \color[HTML]{F1F1F1} 0.77 & {\cellcolor[HTML]{C1E6BA}} \color[HTML]{000000} 2.35 & {\cellcolor[HTML]{005221}} \color[HTML]{F1F1F1} 0.47 \\
 & Inari & {\cellcolor[HTML]{004E1F}} \color[HTML]{F1F1F1} 0.44 & {\cellcolor[HTML]{E8F6E3}} \color[HTML]{000000} 2.80 & {\cellcolor[HTML]{005924}} \color[HTML]{F