# Overall results

In [1]:
import json
from collections import defaultdict
from pathlib import Path

import pandas as pd

pd.options.plotting.backend = "plotly"
# level = "page_level"
level = "line_level"


plot_data = defaultdict(list)
score_data = defaultdict(list)

for model_dir in Path(f"../../output/valset_evaluation/{level}").iterdir():
    model_name = model_dir.name
    if "transk" in model_name and "lm" not in model_name:
        continue
    collection_level = json.loads((model_dir / "all_rows.json").read_text())

    score_data["model"].append(model_name)

    for score_name, value in collection_level.items():
        if type(value) is dict:
            continue
        plot_data["model"].append(model_name)
        plot_data["score"].append(score_name)
        plot_data["value"].append(value)

        # add inverted score
        plot_data["model"].append(model_name)
        plot_data["score"].append(f"1 - {score_name}")
        plot_data["value"].append(1 - value)

        score_data[score_name].append(value)


plot_data_df = pd.DataFrame(plot_data)

In [2]:
def get_model_colormap(models: list[str]) -> dict[str, str]:
    transkribus_models = [e for e in models if e.startswith("transk")]
    sme_models = [e for e in models if "sme" in e]
    our_tesseract_models = [e for e in models if e.startswith("tess") and "smi" in e]
    base_models = [e for e in models if e.startswith("tess") and "smi" not in e]
    trocr_models = [e for e in models if "trocr" in e]

    greens = [
        "#4c9c0a",
        "#287e00",
        "#006000",
        "#004400",
        "#002a00",
        "#004400",
        "#002a00",
        "#004400",
        "#002a00",
        "#004400",
        "#002a00",
    ]
    greys = ["#f2f1f0", "#c0c9cc", "#a39d92", "#636059"]
    blues = [
        "#68bbe3",
        "#0e86d4",
        "#055c9d",
        "#003060",
        "#68bbe3",
        "#0e86d4",
        "#055c9d",
        "#003060",
        "#68bbe3",
        "#0e86d4",
        "#055c9d",
        "#003060",
    ]
    reds = [
        "#ef553b",
        "#ca3320",
        "#a60205",
        "#830000",
        "#630000",
        "#ff4410",
        "#d91700",
        "#b30000",
        "#8f0000",
        "#6e0000",
    ]
    purples = [
        "#5d1669",
        "#e588ff",
        "#47006d",
        "#a05bc5",
        "#ffcbff",
        "#a057aa",
        "#733198",
        "#ce86f4",
        "#3a0060",
        "#faccff",
        "#640099",
        "#be93fd",
        "#a178df",
        "#4e025a",
        "#dcb0ff",
        "#845ec2",
        "#640099",
        "#be93fd",
    ]
    transkribus_models_color_map = {
        model: greens[i] for i, model in enumerate(transkribus_models)
    }
    tesseract_models_color_map = {
        model: purples[i] for i, model in enumerate(our_tesseract_models)
    }
    sme_models_color_map = {model: reds[i] for i, model in enumerate(sme_models)}
    base_models_color_map = {model: greys[i] for i, model in enumerate(base_models)}
    trocr_models_color_map = {model: blues[i] for i, model in enumerate(trocr_models)}

    return {
        **transkribus_models_color_map,
        **tesseract_models_color_map,
        **sme_models_color_map,
        **base_models_color_map,
        **trocr_models_color_map,
    }

In [None]:
plot_df = plot_data_df[plot_data_df.score.apply(lambda x: "1" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "sme" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "smi" in x)]
plot_df = plot_df[plot_df.score.apply(lambda x: not "mean" in x)]
plot_df = plot_df.sort_values("value")

color_map = get_model_colormap(list(plot_df.model.unique()))

plot_df.plot.bar(
    x="score", y="value", color="model", barmode="group", color_discrete_map=color_map
)