In [7]:
import json
from pathlib import Path

import pandas as pd
from pandas.io.formats.style import Styler


def round_and_percentage(num: float) -> float:
    return round(num * 100, 2)


model_prefix_name_map = {"transk": "Transkribus", "tess": "Tesseract", "trocr": "TrOCR", "baseline": "Baseline"}
dataset_map = {
    "ub_smi": "GT-Sámi (without base)",
    "smi": "GT-Sámi",
    "smi_nor": "GT-Sámi + GT-Nor",
    "smi_pred": "GT-Sámi + Pred-Sámi",
    "smi_nor_pred": "GT-Sámi + GT-Nor + Pred-Sámi",
}

In [8]:
index = pd.MultiIndex.from_product(
    [["CER", "WER", "Sámi letter F1"], ["overall", "sma", "sme", "smj", "smn"]]
)

In [16]:
from collections import defaultdict

dataset = "baseline_evaluation"  # "baseline_evaluation"  # or "testset_evaluation" to not use NB-boxes
best_tesseract_model = f"../../output/{dataset}/line_level/tess_sb_smi_nor_pred"
best_trocr_model = f"../../output/{dataset}/line_level/trocr_smi_pred_synth"
best_transkribus_model = (
    f"../../output/{dataset}/line_level/transk_smi_nor_pred"
)
baseline = (
    f"../../output/{dataset}/line_level/baseline"
)


langs_in_order = ["all", "sma", "sme", "smj", "smn"]
scores_in_order = ["CER", "WER", "Sámi letter F1"]
index = pd.MultiIndex.from_product([scores_in_order, langs_in_order])

df_data = {}

for model_p in [best_transkribus_model, best_tesseract_model, best_trocr_model, baseline]:
    model_p = Path(model_p)
    if not model_p.exists():
        print(model_p)
        continue
    model_prefix, _, _ = model_p.name.partition("_")

    model_scores = defaultdict(defaultdict)

    mistakes = json.loads((model_p / "all_rows.json").read_text(encoding="utf-8"))["mistakes"]

    
    #df_data[model_prefix_name_map[model_prefix], "Error"] = [f"{replacement!r} -> {substring!r}" for (substring, replacement), _count in mistakes[:10]]
    df_data[model_prefix_name_map[model_prefix], "Error", 0] = [repr(replacement) for (_, replacement), _ in mistakes[:10]]
    df_data[model_prefix_name_map[model_prefix], "Error", 1] = ["->" for _, _ in mistakes[:10]]
    df_data[model_prefix_name_map[model_prefix], "Error", 2] = [repr(substring) for (substring, _), _ in mistakes[:10]]
    df_data[model_prefix_name_map[model_prefix], "#", 3] = [count for _, count in mistakes[:10]]

In [17]:
df_data

{('Transkribus', 'Error', 0): ["'â'",
  "'â'",
  "'Á'",
  "'/'",
  "'đ'",
  "'i'",
  "'š'",
  "'ä'",
  "'ï'",
  "''"],
 ('Transkribus', 'Error', 1): ['->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->'],
 ('Transkribus', 'Error', 2): ["'á'",
  "'a'",
  "'A'",
  "' '",
  "'d'",
  "'ï'",
  "''",
  "'á'",
  "'i'",
  "'-'"],
 ('Transkribus', '#', 3): [16, 14, 9, 9, 7, 7, 6, 5, 5, 4],
 ('Transkribus', '\\(n_e\\)', 4): [16, 14, 9, 9, 7, 7, 6, 5, 5, 4],
 ('Transkribus', '\\(n_c\\)', 5): [16, 14, 9, 9, 7, 7, 6, 5, 5, 4],
 ('Transkribus', '\\(n_m\\)', 6): [16, 14, 9, 9, 7, 7, 6, 5, 5, 4],
 ('Tesseract', 'Error', 0): ["'ï'",
  "'â'",
  "'đ'",
  "'Á'",
  "''",
  "''",
  "''",
  "'s'",
  "'â'",
  "'.'"],
 ('Tesseract', 'Error', 1): ['->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->',
  '->'],
 ('Tesseract', 'Error', 2): ["'i'",
  "'á'",
  "'d'",
  "'A'",
  "'d'",
  "'á'",
  "'i'",
  "'S'",
  "'å'",
  "''"],
 ('Tesseract', '#', 3): [24, 22, 12, 10, 8, 7, 7

In [22]:
pd.DataFrame(df_data)

Unnamed: 0_level_0,Transkribus,Transkribus,Transkribus,Transkribus,Transkribus,Transkribus,Transkribus,Tesseract,Tesseract,Tesseract,...,TrOCR,TrOCR,TrOCR,Baseline,Baseline,Baseline,Baseline,Baseline,Baseline,Baseline
Unnamed: 0_level_1,Error,Error,Error,#,\(n_e\),\(n_c\),\(n_m\),Error,Error,Error,...,\(n_e\),\(n_c\),\(n_m\),Error,Error,Error,#,\(n_e\),\(n_c\),\(n_m\)
Unnamed: 0_level_2,0,1,2,3,4,5,6,0,1,2,...,4,5,6,0,1,2,3,4,5,6
0,'â',->,'á',16,16,16,16,'ï',->,'i',...,9,9,9,'á',->,'å',313,313,313,313
1,'â',->,'a',14,14,14,14,'â',->,'á',...,7,7,7,'ï',->,'i',137,137,137,137
2,'Á',->,'A',9,9,9,9,'đ',->,'d',...,6,6,6,'â',->,'å',103,103,103,103
3,'/',->,' ',9,9,9,9,'Á',->,'A',...,5,5,5,'–',->,'-',75,75,75,75
4,'đ',->,'d',7,7,7,7,'',->,'d',...,4,4,4,'š',->,'s',72,72,72,72
5,'i',->,'ï',7,7,7,7,'',->,'á',...,4,4,4,'đ',->,'d',48,48,48,48
6,'š',->,'',6,6,6,6,'',->,'i',...,4,4,4,'á',->,'a',46,46,46,46
7,'ä',->,'á',5,5,5,5,'s',->,'S',...,4,4,4,'â',->,'á',30,30,30,30
8,'ï',->,'i',5,5,5,5,'â',->,'å',...,3,3,3,'â',->,'ä',26,26,26,26
9,'',->,'-',4,4,4,4,'.',->,'',...,3,3,3,'č',->,'c',26,26,26,26


In [6]:
print(
    Styler(pd.DataFrame(df_data))
    .hide(axis="index")
    .to_latex(multicol_align="c", hrules=True, column_format="@{}c@{}c@{}ccc@{}c@{}ccc@{}c@{}ccc@{}c@{}cc@{}")
    # Remove second multiindex level
    .replace(r"0 & 1 & 2 & 3 & 0 & 1 & 2 & 3 & 0 & 1 & 2 & 3 & 0 & 1 & 2 & 3 \\", "")
    .replace("\n\n", "\n")
    # Convert arrows to LaTeX
    .replace('->', r'\(\rightarrow\)')
    # Escape hashes
    .replace("#", r"\#")
    # Correct quotes
    .replace("\n'", "\n`")
    .replace("& '", "& `")
    # Setup midrules
    .replace(r"\midrule", r"\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-7}\cmidrule(lr){8-8}\cmidrule(lr){9-11}\cmidrule(lr){12-12}\cmidrule(lr){13-15}\cmidrule(l){16-16}")
    .replace(r"\\" + "\n" + r"\multicolumn", r"\\" + "\n" + r"\cmidrule(r){1-4}\cmidrule(lr){5-8}\cmidrule(lr){9-12}\cmidrule(lr){13-16}" + "\n" + r"\multicolumn")    
    # Bold model names
    .replace("Transkribus", r"\textbf{Transkribus}")
    .replace("Tesseract", r"\textbf{Tesseract}")
    .replace("TrOCR", r"\textbf{TrOCR}")
    .replace("Baseline", r"\textbf{Baseline}")
)

\begin{tabular}{@{}c@{}c@{}ccc@{}c@{}ccc@{}c@{}ccc@{}c@{}cc@{}}
\toprule
\multicolumn{4}{c}{\textbf{Transkribus}} & \multicolumn{4}{c}{\textbf{Tesseract}} & \multicolumn{4}{c}{\textbf{TrOCR}} & \multicolumn{4}{c}{\textbf{Baseline}} \\
\cmidrule(r){1-4}\cmidrule(lr){5-8}\cmidrule(lr){9-12}\cmidrule(lr){13-16}
\multicolumn{3}{c}{Error} & \# & \multicolumn{3}{c}{Error} & \# & \multicolumn{3}{c}{Error} & \# & \multicolumn{3}{c}{Error} & \# \\
\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-7}\cmidrule(lr){8-8}\cmidrule(lr){9-11}\cmidrule(lr){12-12}\cmidrule(lr){13-15}\cmidrule(l){16-16}
`â' & \(\rightarrow\) & `á' & 16 & `ï' & \(\rightarrow\) & `i' & 24 & `Á' & \(\rightarrow\) & `A' & 9 & `á' & \(\rightarrow\) & `å' & 313 \\
`â' & \(\rightarrow\) & `a' & 14 & `â' & \(\rightarrow\) & `á' & 22 & `' & \(\rightarrow\) & `l' & 7 & `ï' & \(\rightarrow\) & `i' & 137 \\
`Á' & \(\rightarrow\) & `A' & 9 & `đ' & \(\rightarrow\) & `d' & 12 & `Š' & \(\rightarrow\) & `S' & 6 & `â' & \(\rightarrow\) &

In [72]:
dataset = "baseline_evaluation"  # "baseline_evaluation"  # or "testset_evaluation" to not use NB-boxes
best_tesseract_model = f"../../output/{dataset}/line_level/tess_sb_smi_nor_pred"
best_trocr_model = f"../../output/{dataset}/line_level/trocr_smi_pred_synth"
best_transkribus_model = (
    f"../../output/{dataset}/line_level/transk_smi_nor_pred"
)
baseline = (
    f"../../output/{dataset}/line_level/baseline"
)


langs_in_order = ["all", "sma", "sme", "smj", "smn"]
scores_in_order = ["CER", "WER", "Sámi letter F1"]
index = pd.MultiIndex.from_product([scores_in_order, langs_in_order])

df_data = {}

for model_p in [best_transkribus_model, best_tesseract_model, best_trocr_model, baseline]:
    model_p = Path(model_p)
    if not model_p.exists():
        print(model_p)
        continue
    model_prefix, _, _ = model_p.name.partition("_")

    model_scores = defaultdict(defaultdict)

    evaluation = json.loads((model_p / "all_rows.json").read_text(encoding="utf-8"))
    mistakes = evaluation["mistakes"]

        
    
    true_positives = dict(evaluation["true_positives"])
    false_negatives = dict(evaluation["false_negatives"])
    original_letters = [replacement for (_, replacement), _ in mistakes]
    df_data[model_prefix_name_map[model_prefix]] = {}
    df_data[model_prefix_name_map[model_prefix]]["Error", 0] = [repr(letter) for letter in original_letters]
    df_data[model_prefix_name_map[model_prefix]]["Error", 1] = ["->" for _, _ in mistakes]
    df_data[model_prefix_name_map[model_prefix]]["Error", 2] = [repr(substring) for (substring, _), _ in mistakes]
    df_data[model_prefix_name_map[model_prefix]][r"\(n_e\)", 3] = [count for _, count in mistakes]
    df_data[model_prefix_name_map[model_prefix]][r"\(n_m\)", 4] = [false_negatives.get(c, 0) if c else "--" for c in original_letters]
    df_data[model_prefix_name_map[model_prefix]][r"\(n_c\)", 5] = [(true_positives.get(c, 0) + false_negatives.get(c, 0)) if c else "--" for c in original_letters]

In [104]:
# Since the different models have a different number of mistakes, we first create data frames for each model before we concatenamte them sideways
dfs = []
for k, data in df_data.items():
    # Set up dataframe and sort values
    df = pd.DataFrame(data).sort_values([(r'\(n_e\)', 3), (r'\(n_m\)', 4)], ascending=False).head(10)

    # Add the model type column level
    df = df.T.reset_index().assign(model=k).set_index(["model", "level_0", "level_1"]).T.reset_index(drop=True)
    df.columns.names = [None, None, None]

    dfs.append(df)

df = pd.concat(dfs, axis=1)

In [105]:
df

Unnamed: 0_level_0,Transkribus,Transkribus,Transkribus,Transkribus,Transkribus,Transkribus,Tesseract,Tesseract,Tesseract,Tesseract,...,TrOCR,TrOCR,TrOCR,TrOCR,Baseline,Baseline,Baseline,Baseline,Baseline,Baseline
Unnamed: 0_level_1,Error,Error,Error,\(n_e\),\(n_m\),\(n_c\),Error,Error,Error,\(n_e\),...,Error,\(n_e\),\(n_m\),\(n_c\),Error,Error,Error,\(n_e\),\(n_m\),\(n_c\)
Unnamed: 0_level_2,0,1,2,3,4,5,0,1,2,3,...,2,3,4,5,0,1,2,3,4,5
0,'â',->,'á',16,35,287,'ï',->,'i',24,...,'A',9,11,28,'á',->,'å',313,418,1136
1,'â',->,'a',14,35,287,'â',->,'á',22,...,'l',7,--,--,'ï',->,'i',137,139,160
2,'Á',->,'A',9,10,28,'đ',->,'d',12,...,'S',6,6,6,'â',->,'å',103,180,287
3,'/',->,' ',9,9,10,'Á',->,'A',10,...,'i',5,--,--,'–',->,'-',75,77,82
4,'i',->,'ï',7,13,3299,'',->,'d',8,...,' ',4,--,--,'š',->,'s',72,95,215
5,'đ',->,'d',7,11,173,'',->,'á',7,...,'ï',4,21,3299,'đ',->,'d',48,61,173
6,'š',->,'',6,6,215,'',->,'i',7,...,'å',4,14,1136,'á',->,'a',46,418,1136
7,'ä',->,'á',5,6,150,'s',->,'S',7,...,'C',4,4,8,'â',->,'á',30,180,287
8,'ï',->,'i',5,5,160,'â',->,'å',6,...,'a',3,14,1136,'â',->,'ä',26,180,287
9,'',->,'-',4,--,--,'.',->,'',5,...,'u',3,8,3247,'č',->,'c',26,62,163


In [114]:
print(r"\setlength{\tabcolsep}{3.7pt}")
print(r"\setlength{\cmidrulekern}{3.7pt}")
print(
    Styler(df)
    .hide(axis="index")
    .to_latex(multicol_align="c", hrules=True, column_format="@{}c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr@{}")
    # Remove second multiindex level
    .replace(r"0 & 1 & 2 & 3 & 4 & 5 & 0 & 1 & 2 & 3 & 4 & 5 & 0 & 1 & 2 & 3 & 4 & 5 & 0 & 1 & 2 & 3 & 4 & 5 \\", "")
    .replace("\n\n", "\n")
    # Convert arrows to LaTeX
    .replace('->', r'\(\shortrightarrow\)')
    # Escape hashes
    .replace("#", r"\#")
    # Correct quotes
    .replace("\n'", "\n`")
    .replace("& '", "& `")
    # Setup midrules
    .replace(
        r"\midrule",
        (
            r"\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-5}\cmidrule(lr){6-6}"
            "\n"
            r"\cmidrule(lr){7-9}\cmidrule(lr){10-10}\cmidrule(lr){11-11}\cmidrule(lr){12-12}"
            "\n"
            r"\cmidrule(lr){13-15}\cmidrule(lr){16-16}\cmidrule(lr){17-17}\cmidrule(lr){18-18}"
            "\n"
            r"\cmidrule(lr){19-21}\cmidrule(lr){22-22}\cmidrule(lr){23-23}\cmidrule(l){24-24}"
        )
    )
    .replace(r"\\" + "\n" + r"\multicolumn", r"\\" + "\n" + r"\cmidrule(r){1-6}\cmidrule(lr){7-12}\cmidrule(lr){13-18}\cmidrule(lr){19-24}" + "\n" + r"\multicolumn")    
    # Bold model names
    .replace("Transkribus", r"\textbf{Transkribus}")
    .replace("Tesseract", r"\textbf{Tesseract}")
    .replace("TrOCR", r"\textbf{TrOCR}")
    .replace("Baseline", r"\textbf{Baseline}")
    # Center n_c
    .replace(r"\(n_c\)", r"\multicolumn{1}{c}{\(n_c\)}")
)

\setlength{\tabcolsep}{3.7pt}
\setlength{\cmidrulekern}{3.7pt}
\begin{tabular}{@{}c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr@{}}
\toprule
\multicolumn{6}{c}{\textbf{Transkribus}} & \multicolumn{6}{c}{\textbf{Tesseract}} & \multicolumn{6}{c}{\textbf{TrOCR}} & \multicolumn{6}{c}{\textbf{Baseline}} \\
\cmidrule(r){1-6}\cmidrule(lr){7-12}\cmidrule(lr){13-18}\cmidrule(lr){19-24}
\multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} \\
\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-5}\cmidrule(lr){6-6}
\cmidrule(lr){7-9}\cmidrule(lr){10-10}\cmidrule(lr){11-11}\cmidrule(lr){12-12}
\cmidrule(lr){13-15}\cmidrule(lr){16-16}\cmidrule(lr){17-17}\cmidrule(lr){18-18}
\cmidrule(lr){19-21}\cmidrule(lr){22-22}\cmidrule(lr){23-23}\cmidrule

In [113]:
\setlength{\tabcolsep}{3.7pt}
\setlength{\cmidrulekern}{3.7pt}
\begin{tabular}{@{}c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr@{}}
\toprule
\multicolumn{6}{c}{\textbf{Transkribus}} & \multicolumn{6}{c}{\textbf{Tesseract}} & \multicolumn{6}{c}{\textbf{TrOCR}} & \multicolumn{6}{c}{\textbf{Baseline}} \\
\cmidrule(r){1-6}\cmidrule(lr){7-12}\cmidrule(lr){13-18}\cmidrule(lr){19-24}
\multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} \\
\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-5}\cmidrule(lr){6-6}
\cmidrule(lr){7-9}\cmidrule(lr){10-10}\cmidrule(lr){11-11}\cmidrule(lr){12-12}
\cmidrule(lr){13-15}\cmidrule(lr){16-16}\cmidrule(lr){17-17}\cmidrule(lr){18-18}
\cmidrule(lr){19-21}\cmidrule(lr){22-22}\cmidrule(lr){23-23}\cmidrule(l){24-24}
`â' & \(\shortrightarrow\) & `á' & 16 & 35 & 287 & `ï' & \(\shortrightarrow\) & `i' & 24 & 27 & 160 & `Á' & \(\shortrightarrow\) & `A' & 9 & 11 & 28 & `á' & \(\shortrightarrow\) & `å' & 313 & 418 & 1136 \\
`â' & \(\shortrightarrow\) & `a' & 14 & 35 & 287 & `â' & \(\shortrightarrow\) & `á' & 22 & 29 & 287 & `' & \(\shortrightarrow\) & `l' & 7 & -- & -- & `ï' & \(\shortrightarrow\) & `i' & 137 & 139 & 160 \\
`Á' & \(\shortrightarrow\) & `A' & 9 & 10 & 28 & `đ' & \(\shortrightarrow\) & `d' & 12 & 14 & 173 & `Š' & \(\shortrightarrow\) & `S' & 6 & 6 & 6 & `â' & \(\shortrightarrow\) & `å' & 103 & 180 & 287 \\
`/' & \(\shortrightarrow\) & ` ' & 9 & 9 & 10 & `Á' & \(\shortrightarrow\) & `A' & 10 & 11 & 28 & `' & \(\shortrightarrow\) & `i' & 5 & -- & -- & `–' & \(\shortrightarrow\) & `-' & 75 & 77 & 82 \\
`i' & \(\shortrightarrow\) & `ï' & 7 & 13 & 3299 & `' & \(\shortrightarrow\) & `d' & 8 & -- & -- & `' & \(\shortrightarrow\) & ` ' & 4 & -- & -- & `š' & \(\shortrightarrow\) & `s' & 72 & 95 & 215 \\
`đ' & \(\shortrightarrow\) & `d' & 7 & 11 & 173 & `' & \(\shortrightarrow\) & `á' & 7 & -- & -- & `i' & \(\shortrightarrow\) & `ï' & 4 & 21 & 3299 & `đ' & \(\shortrightarrow\) & `d' & 48 & 61 & 173 \\
`š' & \(\shortrightarrow\) & `' & 6 & 6 & 215 & `' & \(\shortrightarrow\) & `i' & 7 & -- & -- & `á' & \(\shortrightarrow\) & `å' & 4 & 14 & 1136 & `á' & \(\shortrightarrow\) & `a' & 46 & 418 & 1136 \\
`ä' & \(\shortrightarrow\) & `á' & 5 & 6 & 150 & `s' & \(\shortrightarrow\) & `S' & 7 & 8 & 1509 & `Č' & \(\shortrightarrow\) & `C' & 4 & 4 & 8 & `â' & \(\shortrightarrow\) & `á' & 30 & 180 & 287 \\
`ï' & \(\shortrightarrow\) & `i' & 5 & 5 & 160 & `â' & \(\shortrightarrow\) & `å' & 6 & 29 & 287 & `á' & \(\shortrightarrow\) & `a' & 3 & 14 & 1136 & `â' & \(\shortrightarrow\) & `ä' & 26 & 180 & 287 \\
`' & \(\shortrightarrow\) & `-' & 4 & -- & -- & `.' & \(\shortrightarrow\) & `' & 5 & 6 & 509 & `a' & \(\shortrightarrow\) & `u' & 3 & 8 & 3247 & `č' & \(\shortrightarrow\) & `c' & 26 & 62 & 163 \\
\bottomrule
\end{tabular}

SyntaxError: unexpected character after line continuation character (2954633831.py, line 1)

In [31]:
\setlength{\tabcolsep}{3.7pt}
\setlength{\cmidrulekern}{3.7pt}
\begin{tabular}{@{}c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr|c@{}c@{}crrr@{}}
\toprule
\multicolumn{6}{c}{\textbf{Transkribus}} & \multicolumn{6}{c}{\textbf{Tesseract}} & \multicolumn{6}{c}{\textbf{TrOCR}} & \multicolumn{6}{c}{\textbf{Baseline}} \\
\cmidrule(r){1-6}\cmidrule(lr){7-12}\cmidrule(lr){13-18}\cmidrule(lr){19-24}
\multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} & \multicolumn{3}{c}{Error} & \(n_e\) & \(n_m\) & \multicolumn{1}{c}{\(n_c\)} \\
\cmidrule(r){1-3}\cmidrule(lr){4-4}\cmidrule(lr){5-5}\cmidrule(lr){6-6}
\cmidrule(lr){7-9}\cmidrule(lr){10-10}\cmidrule(lr){11-11}\cmidrule(lr){12-12}
\cmidrule(lr){13-15}\cmidrule(lr){16-16}\cmidrule(lr){17-17}\cmidrule(lr){18-18}
\cmidrule(lr){19-21}\cmidrule(lr){22-22}\cmidrule(lr){23-23}\cmidrule(l){24-24}
`â' & \(\shortrightarrow\) & `á' & 16 & 35 & 287 & `ï' & \(\shortrightarrow\) & `i' & 24 & 27 & 160 & `Á' & \(\shortrightarrow\) & `A' & 9 & 11 & 28 & `á' & \(\shortrightarrow\) & `å' & 313 & 418 & 1136 \\
`â' & \(\shortrightarrow\) & `a' & 14 & 35 & 287 & `â' & \(\shortrightarrow\) & `á' & 22 & 29 & 287 & `' & \(\shortrightarrow\) & `l' & 7 & -- & -- & `ï' & \(\shortrightarrow\) & `i' & 137 & 139 & 160 \\
`Á' & \(\shortrightarrow\) & `A' & 9 & 10 & 28 & `đ' & \(\shortrightarrow\) & `d' & 12 & 14 & 173 & `Š' & \(\shortrightarrow\) & `S' & 6 & 6 & 6 & `â' & \(\shortrightarrow\) & `å' & 103 & 180 & 287 \\
`/' & \(\shortrightarrow\) & ` ' & 9 & 9 & 10 & `Á' & \(\shortrightarrow\) & `A' & 10 & 11 & 28 & `' & \(\shortrightarrow\) & `i' & 5 & -- & -- & `–' & \(\shortrightarrow\) & `-' & 75 & 77 & 82 \\
`i' & \(\shortrightarrow\) & `ï' & 7 & 13 & 3299 & `' & \(\shortrightarrow\) & `d' & 8 & -- & -- & `' & \(\shortrightarrow\) & ` ' & 4 & -- & -- & `š' & \(\shortrightarrow\) & `s' & 72 & 95 & 215 \\
`đ' & \(\shortrightarrow\) & `d' & 7 & 11 & 173 & `' & \(\shortrightarrow\) & `á' & 7 & -- & -- & `i' & \(\shortrightarrow\) & `ï' & 4 & 21 & 3299 & `đ' & \(\shortrightarrow\) & `d' & 48 & 61 & 173 \\
`š' & \(\shortrightarrow\) & `' & 6 & 6 & 215 & `' & \(\shortrightarrow\) & `i' & 7 & -- & -- & `á' & \(\shortrightarrow\) & `å' & 4 & 14 & 1136 & `á' & \(\shortrightarrow\) & `a' & 46 & 418 & 1136 \\
`ä' & \(\shortrightarrow\) & `á' & 5 & 6 & 150 & `s' & \(\shortrightarrow\) & `S' & 7 & 8 & 1509 & `Č' & \(\shortrightarrow\) & `C' & 4 & 4 & 8 & `â' & \(\shortrightarrow\) & `á' & 30 & 180 & 287 \\
`ï' & \(\shortrightarrow\) & `i' & 5 & 5 & 160 & `â' & \(\shortrightarrow\) & `å' & 6 & 29 & 287 & `á' & \(\shortrightarrow\) & `a' & 3 & 14 & 1136 & `â' & \(\shortrightarrow\) & `ä' & 26 & 180 & 287 \\
`' & \(\shortrightarrow\) & `-' & 4 & -- & -- & `.' & \(\shortrightarrow\) & `' & 5 & 6 & 509 & `a' & \(\shortrightarrow\) & `u' & 3 & 8 & 3247 & `č' & \(\shortrightarrow\) & `c' & 26 & 62 & 163 \\
\bottomrule
\end{tabular}

{'WER_mean': 0.20013702230683364,
 'CER_mean': 0.03496457619689371,
 'WER_concat': 0.1870567375886525,
 'CER_concat': 0.03376849434290687,
 'special_char_F1_mean': 0.5254098206927301,
 'special_char_F1_concat': 0.7513095535046146,
 'Á': {'F1_mean': 0.56, 'F1_concat': 0.8085106382978723},
 'Â': {'F1_mean': nan, 'F1_concat': nan},
 'Č': {'F1_mean': 0.375, 'F1_concat': 0.5454545454545454},
 'Đ': {'F1_mean': 1.0, 'F1_concat': 1.0},
 'Ï': {'F1_mean': nan, 'F1_concat': nan},
 'Ŋ': {'F1_mean': nan, 'F1_concat': nan},
 'Š': {'F1_mean': 0.6666666666666666, 'F1_concat': 0.8},
 'Ŧ': {'F1_mean': nan, 'F1_concat': nan},
 'Ž': {'F1_mean': 1.0, 'F1_concat': 1.0},
 'Ä': {'F1_mean': 0.3333333333333333, 'F1_concat': 0.6666666666666666},
 'Ö': {'F1_mean': 0.5, 'F1_concat': 0.6666666666666666},
 'á': {'F1_mean': 0.5860683589675186, 'F1_concat': 0.7972472207517205},
 'â': {'F1_mean': 0.329240749330035, 'F1_concat': 0.5468354430379747},
 'č': {'F1_mean': 0.5868347338935574, 'F1_concat': 0.7651515151515151},