# Language-specific results

In [1]:
from ast import literal_eval
from collections import defaultdict
from pathlib import Path

import pandas as pd

langcode_name_map = {
    "sma": "sørsamisk",
    "sme": "nordsamisk",
    "smj": "lulesamisk",
    "smn": "enaresamisk",
}

# level = "page_level"
level = "line_level"

data = defaultdict(list)

for model_dir in Path(f"../output/evaluation/{level}").iterdir():
    model_name = model_dir.name

    plot_df = pd.read_csv(model_dir / "row_level.csv")
    if "langcodes" not in plot_df.columns:
        continue

    for langcodes, df_ in plot_df.groupby("langcodes"):
        langcodes = literal_eval(langcodes)
        langcode = langcodes[0]
        wer = df_.WER.mean()
        cer = df_.CER.mean()

        data["model"].append(model_name)
        data["score"].append("WER")
        data["value"].append(wer)
        data["language"].append(langcode_name_map[langcode])

        data["model"].append(model_name)
        data["score"].append("CER")
        data["value"].append(cer)
        data["language"].append(langcode_name_map[langcode])

df = pd.DataFrame(data)

In [None]:
n = 10

for lang, df_ in df.groupby("language"):
    print(f"Topp {n} CER for {lang}")
    cer_df = df_[df_.score == "CER"]
    print(cer_df.sort_values("value")[["model", "value"]].head(n))
    print("\n")
    print(f"Topp {n} WER for {lang}")
    wer_df = df_[df_.score == "WER"]
    print(wer_df.sort_values("value")[["model", "value"]].head(n))

## Plot med språk i dropdown

In [None]:
import dash
from dash import Input, Output, dcc, html
from plotly import colors

best_rows = []
n = 5

for (lang, score), df_ in df.groupby(["language", "score"]):
    df_ = df_.sort_values("value").head(n)
    df_["score"] = [score] * len(df_)
    df_["language"] = [lang] * len(df_)
    best_rows.append(df_)

plot_df = pd.concat(best_rows)
model_color_map = {
    model: colors.qualitative.Plotly[i] for i, model in enumerate(plot_df.model.unique())
}


app = dash.Dash(__name__)
app.layout = html.Div(
    [
        dcc.Dropdown(
            id="language-dropdown",
            options=plot_df.language.unique(),
            value="nordsamisk",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


@app.callback(Output("bar-chart", "figure"), [Input("language-dropdown", "value")])
def update_bar_chart(selected_lang):  # noqa
    filtered_df = plot_df[plot_df["language"] == selected_lang]
    filtered_df = filtered_df.sort_values("value")
    fig = px.bar(
        filtered_df,
        x="score",
        y="value",
        color="model",
        barmode="group",
        color_discrete_map=model_color_map,
        title=f"De {n} beste modellene for {selected_lang}",
    )
    return fig


if __name__ == "__main__":
    app.run_server(debug=True)

## Plot med modell i dropdown

In [4]:
language_color_map = {
    "sørsamisk": "#dc241f",
    "nordsamisk": "#007229",
    "lulesamisk": "#ffce00",
    "enaresamisk": "#0035ad",
}

In [None]:
import dash
import plotly.express as px
from dash import Input, Output, dcc, html

app = dash.Dash(__name__)

app.layout = html.Div(
    [
        dcc.Dropdown(
            id="model-dropdown",
            options=df.model.unique(),
            value="fin",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


@app.callback(Output("bar-chart", "figure"), [Input("model-dropdown", "value")])
def update_bar_chart(selected_model):  # noqa
    filtered_df = df[df["model"] == selected_model]
    filtered_df = filtered_df.sort_values("value")
    fig = px.bar(
        filtered_df,
        x="score",
        y="value",
        color="language",
        barmode="group",
        color_discrete_map=language_color_map,
        title=selected_model,
    )
    return fig


if __name__ == "__main__":
    app.run_server(debug=True)