# Character-wise results

In [None]:
import json
from collections import defaultdict
from pathlib import Path

import pandas as pd

pd.options.plotting.backend = "plotly"

character_data = defaultdict(list)

for level in Path("../output/evaluation/").iterdir():
    for model_dir in level.iterdir():
        model_name = f"{model_dir.name}_{level.name}"
        collection_level = json.loads((model_dir / "all_rows.json").read_text())

        # score_data["model"].append(model_name)

        for character, score_dict in collection_level.items():
            if type(score_dict) is not dict:
                continue
            character_data["model"].append(model_name)
            character_data["F1_score"].append(score_dict["F1"])
            character_data["character"].append(character)

char_df = pd.DataFrame(character_data)

In [None]:
import dash
import plotly.express as px
from dash import Input, Output, dcc, html

app = dash.Dash(__name__)

app.layout = html.Div(
    [
        dcc.Dropdown(
            id="character-dropdown",
            options=char_df.character.unique(),
            value="Ä",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


@app.callback(Output("bar-chart", "figure"), [Input("character-dropdown", "value")])
def update_bar_chart(selected_character):  # noqa
    filtered_df = char_df[char_df.character == selected_character]
    filtered_df = filtered_df.sort_values("F1_score")
    fig = px.bar(
        filtered_df,
        x="model",
        y="F1_score",
        barmode="group",
        # color_discrete_map=language_color_map,
        title=selected_character,
    )
    return fig


if __name__ == "__main__":
    app.run_server(debug=True)

# Language-specific results

In [None]:
from ast import literal_eval
from collections import defaultdict
from pathlib import Path

import pandas as pd

eval_p = Path("../output/evaluation")

data = defaultdict(list)

langcode_name_map = {
    "sma": "sørsamisk",
    "sme": "nordsamisk",
    "smj": "lulesamisk",
    "smn": "inaresamisk",
}

for level in eval_p.iterdir():
    for model_dir in level.iterdir():
        model_name = f"{model_dir.name}_{level.name}"

        plot_df = pd.read_csv(model_dir / "row_level.csv")
        if "langcodes" not in plot_df.columns:
            continue

        for langcodes, df_ in plot_df.groupby("langcodes"):
            langcodes = literal_eval(langcodes)
            langcode = langcodes[0]
            wer = df_.WER.mean()
            cer = df_.CER.mean()

            data["model"].append(model_name)
            data["score"].append("WER")
            data["value"].append(wer)
            data["language"].append(langcode_name_map[langcode])

            data["model"].append(model_name)
            data["score"].append("CER")
            data["value"].append(cer)
            data["language"].append(langcode_name_map[langcode])

df = pd.DataFrame(data)

## Plot med modell i dropdown

In [None]:
language_color_map = {
    "sørsamisk": "#dc241f",
    "nordsamisk": "#007229",
    "lulesamisk": "#ffce00",
    "inaresamisk": "#0035ad",
}

In [None]:
import dash
import plotly.express as px
from dash import Input, Output, dcc, html

app = dash.Dash(__name__)

app.layout = html.Div(
    [
        dcc.Dropdown(
            id="model-dropdown",
            options=df.model.unique(),
            value="fin_line_level",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


@app.callback(Output("bar-chart", "figure"), [Input("model-dropdown", "value")])
def update_bar_chart(selected_model):  # noqa
    filtered_df = df[df["model"] == selected_model]
    filtered_df = filtered_df.sort_values("value")
    fig = px.bar(
        filtered_df,
        x="score",
        y="value",
        color="language",
        barmode="group",
        color_discrete_map=language_color_map,
        title=selected_model,
    )
    return fig


if __name__ == "__main__":
    app.run_server(debug=True)

## Plot med språk i dropdown

In [None]:
import dash
from dash import Input, Output, dcc, html
from plotly import colors

best_rows = []
n = 5

for lang, df_ in df.groupby("language"):
    for score, df__ in df_.groupby("score"):
        df__ = df__.sort_values("value").head(n)
        df__["score"] = [score] * len(df__)
        df__["language"] = [lang] * len(df__)
        best_rows.append(df__)

plot_df = pd.concat(best_rows)
model_color_map = {
    model: colors.qualitative.Plotly[i] for i, model in enumerate(plot_df.model.unique())
}


app = dash.Dash(__name__)
app.layout = html.Div(
    [
        dcc.Dropdown(
            id="language-dropdown",
            options=plot_df.language.unique(),
            value="nordsamisk",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


@app.callback(Output("bar-chart", "figure"), [Input("language-dropdown", "value")])
def update_bar_chart(selected_lang):  # noqa
    filtered_df = plot_df[plot_df["language"] == selected_lang]
    filtered_df = filtered_df.sort_values("value")
    fig = px.bar(
        filtered_df,
        x="score",
        y="value",
        color="model",
        barmode="group",
        color_discrete_map=model_color_map,
        title=f"De {n} beste modellene for {selected_lang}",
    )
    return fig


if __name__ == "__main__":
    app.run_server(debug=True)

In [None]:
n = 10

for lang, df_ in plot_df.groupby("language"):
    print(f"Topp {n} CER for {lang}")
    cer_df = df_[df_.score == "CER"]
    print(cer_df.sort_values("value")[["model", "value"]].head(n))
    print("\n")
    print(f"Topp {n} WER for {lang}")
    wer_df = df_[df_.score == "WER"]
    print(wer_df.sort_values("value")[["model", "value"]].head(n))

# Overall results

In [None]:
import json
from collections import defaultdict
from pathlib import Path

import pandas as pd

pd.options.plotting.backend = "plotly"

plot_data = defaultdict(list)
score_data = defaultdict(list)

for level in Path("../output/evaluation/").iterdir():
    for model_dir in level.iterdir():
        model_name = f"{model_dir.name}_{level.name}"
        collection_level = json.loads((model_dir / "all_rows.json").read_text())

        score_data["model"].append(model_name)

        for score_name, value in collection_level.items():
            if type(value) is dict:
                continue
            plot_data["model"].append(model_name)
            plot_data["score"].append(score_name)
            plot_data["value"].append(value)

            # add inverted score
            plot_data["model"].append(model_name)
            plot_data["score"].append(f"1 - {score_name}")
            plot_data["value"].append(1 - value)

            score_data[score_name].append(value)


plot_data_df = pd.DataFrame(plot_data)

In [None]:
plot_data_df

In [None]:
def get_model_colormap(models: list[str]) -> dict[str, str]:
    transkribus_models = [e for e in models if "transkribus" in e]
    tesseract_models = [e for e in models if "nor_smx" in e]
    other_models = [e for e in models if e not in transkribus_models and e not in tesseract_models]

    greens = ["#4c9c0a", "#287e00", "#006000", "#004400", "#002a00"]
    reds = [
        "#ef553b",
        "#ca3320",
        "#a60205",
        "#830000",
        "#630000",
        "#ff4410",
        "#d91700",
        "#b30000",
        "#8f0000",
        "#6e0000",
    ]
    purples = [
        "#5d1669",
        "#e588ff",
        "#47006d",
        "#a05bc5",
        "#ffcbff",
        "#a057aa",
        "#733198",
        "#ce86f4",
        "#3a0060",
        "#faccff",
        "#640099",
        "#be93fd",
        "#a178df",
        "#4e025a",
        "#dcb0ff",
        "#845ec2",
    ]
    transkribus_models_color_map = {model: greens[i] for i, model in enumerate(transkribus_models)}
    tesseract_models_color_map = {model: purples[i] for i, model in enumerate(tesseract_models)}
    other_models_color_map = {model: reds[i] for i, model in enumerate(other_models)}

    return {
        **transkribus_models_color_map,
        **tesseract_models_color_map,
        **other_models_color_map,
    }

In [None]:
plot_df = plot_data_df[plot_data_df.score.apply(lambda x: "1" not in x)]
# plot_df = plot_df[plot_df.model.apply(lambda x: "sme" not in x)]
# plot_df = plot_df[plot_df.model.apply(lambda x: "page" not in x)]
# plot_df = plot_df[plot_df.model.apply(lambda x: "20" in x or "transkribus" in x)]
plot_df = plot_df[plot_df.score.apply(lambda x: "CER" in x)]
plot_df = plot_df.sort_values("value")
plot_df = plot_df.head(7)

color_map = get_model_colormap(list(plot_df.model.unique()))

plot_df.plot.bar(x="score", y="value", color="model", barmode="group", color_discrete_map=color_map)

In [None]:
plot_df = plot_data_df[plot_data_df.score.apply(lambda x: "1" not in x)]
plot_df = plot_df[plot_df.model.apply(lambda x: "sme" not in x)]
# plot_df = plot_df[plot_df.model.apply(lambda x: "page" not in x)]
# plot_df = plot_df[plot_df.model.apply(lambda x: "20" in x or "transkribus" in x)]
# plot_df = plot_df[plot_df.score.apply(lambda x: "WER" in x)]
plot_df = plot_df.sort_values("value")
plot_df = plot_df.head(7)

color_map = get_model_colormap(list(plot_df.model.unique()))
plot_df.plot.bar(x="score", y="value", color="model", barmode="group", color_discrete_map=color_map)

In [None]:
# df = pd.DataFrame(score_data)

# n = 3
# print(f"Topp {n} CER")
# print(df.sort_values("CER").head(n))
# print("\n")
# print(f"Topp {n} WER")
# print(df.sort_values("WER").head(n))