# Visualize and print results on test set 

In [1]:
from pathlib import Path
import pandas as pd
import json
from collections import defaultdict

pd.options.plotting.backend = "plotly"

plot_data = defaultdict(list)
score_data = defaultdict(list)

for level in Path("../output/evaluation/").iterdir():
    for model_dir in level.iterdir():
        model_name = f"{model_dir.name}_{level.name}"
        collection_level = json.loads((model_dir / "all_rows.json").read_text())

        score_data["model"].append(model_name)

        for score_name, value in collection_level.items():
            plot_data["model"].append(model_name)
            plot_data["score"].append(score_name)
            plot_data["value"].append(value)

            # add inverted score
            plot_data["model"].append(model_name)
            plot_data["score"].append(f"1 - {score_name}")
            plot_data["value"].append(1 - value)

            score_data[score_name].append(value)


plot_data_df = pd.DataFrame(plot_data)

In [11]:
from plotly import colors

from IPython.display import Markdown

plotly_colors = colors.qualitative.Plotly

display(
    Markdown(
        "<br>".join(
            f'<span style="font-family: monospace">{color} <span style="color: {color}">████████</span></span>'
            for color in plotly_colors
        )
    )
)

<span style="font-family: monospace">#636EFA <span style="color: #636EFA">████████</span></span><br><span style="font-family: monospace">#EF553B <span style="color: #EF553B">████████</span></span><br><span style="font-family: monospace">#00CC96 <span style="color: #00CC96">████████</span></span><br><span style="font-family: monospace">#AB63FA <span style="color: #AB63FA">████████</span></span><br><span style="font-family: monospace">#FFA15A <span style="color: #FFA15A">████████</span></span><br><span style="font-family: monospace">#19D3F3 <span style="color: #19D3F3">████████</span></span><br><span style="font-family: monospace">#FF6692 <span style="color: #FF6692">████████</span></span><br><span style="font-family: monospace">#B6E880 <span style="color: #B6E880">████████</span></span><br><span style="font-family: monospace">#FF97FF <span style="color: #FF97FF">████████</span></span><br><span style="font-family: monospace">#FECB52 <span style="color: #FECB52">████████</span></span>

In [24]:
import random
from plotly import colors


def get_colors(models: list[str], seed: int) -> dict[str, str]:
    plotly_colors = colors.qualitative.Plotly
    random.seed(seed)
    model_colors = random.sample(plotly_colors, k=len(models))
    return {model: color for model, color in zip(models, model_colors)}

In [28]:
plot_df = plot_data_df[plot_data_df.score.apply(lambda x: "1" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "sme" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "page" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "20" in x or "transkribus" in x)]
plot_df = plot_df[plot_df.score.apply(lambda x: "CER" in x)]

color_map = get_colors(list(set(plot_df.model)), seed=3)

plot_df = plot_df.sort_values("value")
plot_df.plot.bar(
    x="score", y="value", color="model", barmode="group", color_discrete_map=color_map
)

In [31]:
plot_df = plot_data_df[plot_data_df.score.apply(lambda x: "1" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "sme" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "page" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "20" in x or "transkribus" in x)]
plot_df = plot_df[plot_df.score.apply(lambda x: "WER" in x)]

plot_df = plot_df.sort_values("value")
plot_df.plot.bar(
    x="score", y="value", color="model", barmode="group", color_discrete_map=color_map
)

In [5]:
df = pd.DataFrame(score_data)
df.sort_values("CER")

Unnamed: 0,model,CER,WER
16,transkribus_med_base_line_level,0.002824,0.01959
19,transkribus_uten_base_line_level,0.004186,0.02618
3,transkribus_med_base_page_level,0.004326,0.034361
17,nor_smx_206_line_level,0.006168,0.033838
7,transkribus_uten_base_page_level,0.006174,0.045125
18,nor_smx_201_line_level,0.006886,0.037222
22,nor_smx_205_line_level,0.00748,0.045058
15,nor_smx_200_line_level,0.007852,0.043277
13,nor_smx_30000_line_level,0.013128,0.071238
2,nor_smx_200_page_level,0.037796,0.137446


In [6]:
df = pd.DataFrame(score_data)
df.sort_values("WER")

Unnamed: 0,model,CER,WER
16,transkribus_med_base_line_level,0.002824,0.01959
19,transkribus_uten_base_line_level,0.004186,0.02618
17,nor_smx_206_line_level,0.006168,0.033838
3,transkribus_med_base_page_level,0.004326,0.034361
18,nor_smx_201_line_level,0.006886,0.037222
15,nor_smx_200_line_level,0.007852,0.043277
22,nor_smx_205_line_level,0.00748,0.045058
7,transkribus_uten_base_page_level,0.006174,0.045125
13,nor_smx_30000_line_level,0.013128,0.071238
2,nor_smx_200_page_level,0.037796,0.137446
