In [1]:
import json
from pathlib import Path

import pandas as pd
from pandas.io.formats.style import Styler


def round_and_percentage(num: float) -> float:
    return round(num * 100, 2)


model_prefix_name_map = {"transk": "Transkribus", "tess": "Tesseract", "trocr": "TrOCR"}
dataset_map = {
    "ub_smi": "GT-Sámi (without base)",
    "smi": "GT-Sámi",
    "smi_nor": "GT-Sámi + GT-Nor",
    "smi_pred": "GT-Sámi + Pred-Sámi",
    "smi_nor_pred": "GT-Sámi + GT-Nor + Pred-Sámi",
}

In [2]:
index = pd.MultiIndex.from_product(
    [["CER", "WER", "Sámi letter F1"], ["overall", "sma", "sme", "smj", "smn"]]
)

In [3]:
from collections import defaultdict

## TODO: bytt ut med testset data
best_tesseract_model = "../output/evaluation/line_level/tess_smi_nor_pred"
best_trocr_model = "../output/evaluation/line_level/trocr_smi_nor"
best_transkribus_model = "../output/evaluation/line_level/transk_smi_nor_pred"


langs_in_order = ["all", "sma", "sme", "smj", "smn"]
scores_in_order = ["CER", "WER", "Sámi letter F1"]
index = pd.MultiIndex.from_product([scores_in_order, langs_in_order])

df_data = {}

for model_p in [best_trocr_model, best_tesseract_model, best_transkribus_model]:
    model_p = Path(model_p)
    model_prefix, _, model_info = model_p.name.partition("_")
    model_scores = defaultdict(defaultdict)

    for lang_file in model_p.glob("*rows.json"):
        lang = lang_file.stem.split("_")[0]
        scores = json.loads(lang_file.read_text(encoding="utf-8"))
        model_scores["WER"][lang] = round_and_percentage(scores["WER_concat"])
        model_scores["CER"][lang] = round_and_percentage(scores["CER_concat"])
        model_scores["Sámi letter F1"][lang] = round_and_percentage(scores["special_char_F1_mean"])

    df_data[model_prefix_name_map[model_prefix]] = [
        model_scores[score][lang] for score in scores_in_order for lang in langs_in_order
    ]

In [4]:
df = pd.DataFrame(df_data, index=index)
df

Unnamed: 0,Unnamed: 1,TrOCR,Tesseract,Transkribus
CER,all,1.95,4.4,1.19
CER,sma,1.1,2.67,0.35
CER,sme,1.93,4.58,1.32
CER,smj,1.01,4.19,0.29
CER,smn,2.7,3.31,0.42
WER,all,8.88,7.96,4.47
WER,sma,7.69,3.85,2.66
WER,sme,8.5,8.03,4.84
WER,smj,4.85,6.8,1.46
WER,smn,14.19,9.14,2.23


In [5]:
styler = Styler(df).format("{:.2f}")

min_cer = df.loc["CER"].min(axis=None)
max_cer = df.loc["CER"].max(axis=None)
min_wer = df.loc["WER"].min(axis=None)
max_wer = df.loc["WER"].max(axis=None)
min_f1 = df.loc["Sámi letter F1"].min(axis=None)
max_f1 = df.loc["Sámi letter F1"].max(axis=None)

styler.background_gradient(
    cmap="Greens_r", vmin=min_cer, vmax=max_cer, subset=pd.IndexSlice["CER", :]
)
styler.background_gradient(
    cmap="Greens_r", vmin=min_wer, vmax=max_wer, subset=pd.IndexSlice["WER", :]
)
styler.background_gradient(
    cmap="Greens", vmin=min_f1, vmax=max_f1, subset=pd.IndexSlice["Sámi letter F1", :]
)
styler.format_index(
    {
        "CER": r"CER \(\downarrow\) [\(\%\)]",
        "WER": r"WER \(\downarrow\) [\(\%\)]",
        "Sámi letter F1": r"Sámi letter F1 \(\uparrow\) [\(\%\)]",
    }.__getitem__,
    level=0,
)
styler.format_index(
    {
        "all": "Overall",
        "sma": "Sourth",
        "sme": "North",
        "smj": "Lule",
        "smn": "Inari",
    }.__getitem__,
    level=1,
)

print(
    styler.to_latex(
        convert_css=True, multirow_align="t", hrules=True, clines="skip-last;data"
    ).replace(
        r"\cline{1-5}", r"\midrule", 2
    )  # Repalce cline with midrule since midrule will add some extra whitespace
)

\begin{tabular}{llrrr}
\toprule
 &  & TrOCR & Tesseract & Transkribus \\
\midrule
\multirow[t]{5}{*}{CER \(\downarrow\) [\(\%\)]} & Overall & {\cellcolor[HTML]{46AE60}} \color[HTML]{F1F1F1} 1.95 & {\cellcolor[HTML]{F1FAEE}} \color[HTML]{000000} 4.40 & {\cellcolor[HTML]{17813D}} \color[HTML]{F1F1F1} 1.19 \\
 & Sourth & {\cellcolor[HTML]{127C39}} \color[HTML]{F1F1F1} 1.10 & {\cellcolor[HTML]{88CE87}} \color[HTML]{000000} 2.67 & {\cellcolor[HTML]{00481D}} \color[HTML]{F1F1F1} 0.35 \\
 & North & {\cellcolor[HTML]{43AC5E}} \color[HTML]{F1F1F1} 1.93 & {\cellcolor[HTML]{F7FCF5}} \color[HTML]{000000} 4.58 & {\cellcolor[HTML]{208843}} \color[HTML]{F1F1F1} 1.32 \\
 & Lule & {\cellcolor[HTML]{0B7734}} \color[HTML]{F1F1F1} 1.01 & {\cellcolor[HTML]{EAF7E6}} \color[HTML]{000000} 4.19 & {\cellcolor[HTML]{00441B}} \color[HTML]{F1F1F1} 0.29 \\
 & Inari & {\cellcolor[HTML]{8ACE88}} \color[HTML]{000000} 2.70 & {\cellcolor[HTML]{BAE3B3}} \color[HTML]{000000} 3.31 & {\cellcolor[HTML]{004D1F}} \color[HTML]{