# Visualize mean scores

In [None]:
import pandas as pd
from pathlib import Path

output_dir = Path("../data/output/2024")


def filename_to_data(filename):
    date, _, rest = filename.partition("_")
    model_name, language_code = rest.split("_with_metrics_")
    prediction_langcode = ""
    if (
        "chirp" in model_name
        or "usm" in model_name
        or "azure" in model_name
        or "gcloud" in model_name
    ):
        prediction_langcode = "nob"
    if (
        "bokmaal" in model_name
        or model_name.endswith("_no")
        or model_name.endswith("_nob")
    ):
        prediction_langcode = "nob"
        model_name = model_name.replace("-bokmaal-v2", "")
        model_name = model_name.replace("_nob", "")
        model_name = model_name.replace("_no", "")

    if "nynorsk" in model_name or model_name.endswith("_nn"):
        prediction_langcode = "nno"
        model_name = model_name.replace("-nynorsk", "")
        model_name = model_name.replace("_nn", "")

    return date, model_name, language_code, prediction_langcode


dfs = []
for file in output_dir.glob("*with_metrics*.csv"):
    if "bokmaal" in file.stem and "-v2" not in file.stem:
        continue
    df = pd.read_csv(file)
    date, model_name, language_code, prediction_langcode = filename_to_data(file.stem)
    df["prediction_date"] = pd.to_datetime(date, format="%Y-%m-%d")
    df["prediction_langcode"] = prediction_langcode
    df["model_name"] = model_name
    df["metrics_language"] = language_code

    dfs.append(df)
df = pd.concat(dfs, ignore_index=True)

In [None]:
dialect_replace = {
    "w": "vest",
    "n": "nord",
    "t": "trøndersk",
    "sw": "sørvest",
    "e": "øst",
}

gender_replace = {
    "m": "mann",
    "f": "kvinne",
}

df["dialect"] = df["dialect"].replace(dialect_replace)
df["gender"] = df["gender"].replace(gender_replace)

In [None]:
data_dict = {
    "modell": [],
    "språk": [],
    "CER": [],
    "WER": [],
    "aligned semantic distance": [],
    "semantic distance": [],
    "semantic distance (sBERT)": [],
}

for (model, lang, pred_lang), df_ in df.groupby(
    ["model_name", "metrics_language", "prediction_langcode"]
):
    if pred_lang == "":
        continue
    data_dict["modell"].append(model)
    data_dict["språk"].append(lang)
    data_dict["CER"].append(df_.cer.mean())
    data_dict["WER"].append(df_.wer.mean())
    data_dict["aligned semantic distance"].append(df_.aligned_semdist.mean())
    data_dict["semantic distance"].append(df_.semdist.mean())
    data_dict["semantic distance (sBERT)"].append(df_.sbert_semdist.mean())


mean_score_df = pd.DataFrame(data_dict)

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

import plotly.express as px

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    [
        html.H1("Visualisering av gjennomsnittscore"),
        dcc.Dropdown(
            id="score-dropdown",
            options=[
                {"label": "CER", "value": "CER"},
                {"label": "WER", "value": "WER"},
                {
                    "label": "aligned semantic distance",
                    "value": "aligned semantic distance",
                },
                {"label": "semantic distance", "value": "semantic distance"},
                {
                    "label": "semantic distance (sBERT)",
                    "value": "semantic distance (sBERT)",
                },
            ],
            value="CER",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


# Define the callback to update the bar chart
@app.callback(Output("bar-chart", "figure"), Input("score-dropdown", "value"))
def update_bar_chart(selected_score):
    fig = px.bar(
        mean_score_df.sort_values(by=selected_score, ascending=False),
        x="modell",
        y=selected_score,
        color="språk",
        barmode="group",
        labels={
            "modell": "Modell",
            selected_score: "Gjennomsnittscore",
            "språk": "Språk",
        },
        title=f"Gjennomsnittlig {selected_score} (lavere er bedre)",
    )
    return fig


# Run the app
if __name__ == "__main__":
    app.run(debug=True, jupyter_mode="tab")