# Visualize last years results compared to this years results

In [None]:
from pathlib import Path

p_current = Path("../data/output/2024")
p_previous = Path("../data/output/2023")

current_metric_files = [f for f in p_current.glob("*_with_metrics*.csv")]
prev_metric_files = [f for f in p_previous.glob("*_with_metrics*.csv")]

In [None]:
import pandas as pd


def filestem_to_data(filestem: str) -> tuple[str, str, str, str]:
    date, _, rest = filestem.partition("_")
    model_name, language_code = rest.split("_with_metrics_")
    prediction_langcode = ""
    if (
        "chirp" in model_name
        or "usm" in model_name
        or "azure" in model_name
        or "gcloud" in model_name
    ):
        prediction_langcode = "nob"
    if (
        "bokmaal" in model_name
        or model_name.endswith("_no")
        or model_name.endswith("-no")
        or model_name.endswith("_nob")
    ):
        prediction_langcode = "nob"
        model_name = model_name.replace("-bokmaal-v2", "")
        model_name = model_name.replace("_nob", "")
        model_name = model_name.replace("_no", "")
        model_name = model_name.replace("-no", "")
        if "voxrex" in model_name:
            model_name = "nb-wav2vec2-1b"

    if (
        "nynorsk" in model_name
        or model_name.endswith("_nn")
        or model_name.endswith("-nn")
    ):
        prediction_langcode = "nno"
        model_name = model_name.replace("-nynorsk", "")
        model_name = model_name.replace("_nn", "")
        model_name = model_name.replace("-nn", "")

    model_name = model_name.replace("-long", "")
    model_name = model_name.replace("NbAiLab_", "")
    model_name = model_name.replace("openai_", "openai-")
    model_name = model_name.replace("-v3", "")

    return date, model_name, language_code, prediction_langcode


dfs = []
for e in prev_metric_files:
    date, model_name, language_code, pred_lang = filestem_to_data(e.stem)
    df = pd.read_csv(e)
    df["dialect"] = df.dialect_2024
    df["date"] = date
    df["model_name"] = model_name
    df["language_code"] = language_code
    df["prediction_langcode"] = pred_lang
    df["year"] = 2023
    dfs.append(df)

data_df = pd.concat(dfs)

In [None]:
dfs = []

for e in current_metric_files:
    if "bokmaal" in e.stem and "-v2" not in e.stem:
        continue
    date, model_name, language_code, pred_lang = filestem_to_data(e.stem)
    if pred_lang == "":
        continue
    df = pd.read_csv(e)

    if model_name in data_df.model_name.unique():
        df["date"] = date
        df["model_name"] = model_name
        df["language_code"] = language_code
        df["prediction_langcode"] = pred_lang
        df["year"] = 2024

        dfs.append(df)

current_year_df = pd.concat(dfs)

data_df = pd.concat([data_df, current_year_df])


columns_to_keep = [
    "cer",
    "wer",
    "sbert_semdist",
    "semdist",
    "aligned_semdist",
    "date",
    "model_name",
    "language_code",
    "prediction_langcode",
    "year",
    "dialect",
    "gender",
]

data_df = data_df[columns_to_keep]

In [None]:
dialect_replace = {
    "w": "vest",
    "n": "nord",
    "t": "trøndersk",
    "sw": "sørvest",
    "e": "øst",
}

gender_replace = {
    "m": "mann",
    "f": "kvinne",
}

data_df["dialect"] = data_df["dialect"].replace(dialect_replace)
data_df["gender"] = data_df["gender"].replace(gender_replace)

In [None]:
data_dict = {
    "modell": [],
    "språk": [],
    "CER": [],
    "WER": [],
    "aligned semantic distance": [],
    "semantic distance": [],
    "semantic distance (sBERT)": [],
    "year": [],
}

for (model, lang, pred_lang, year), df_ in data_df.groupby(
    ["model_name", "language_code", "prediction_langcode", "year"]
):
    if pred_lang == "":
        continue
    data_dict["modell"].append(model)
    data_dict["språk"].append(lang)
    data_dict["CER"].append(df_.cer.mean())
    data_dict["WER"].append(df_.wer.mean())
    data_dict["aligned semantic distance"].append(df_.aligned_semdist.mean())
    data_dict["semantic distance"].append(df_.semdist.mean())
    data_dict["semantic distance (sBERT)"].append(df_.sbert_semdist.mean())
    data_dict["year"].append(year)

mean_score_df = pd.DataFrame(data_dict)
mean_score_df["year"] = mean_score_df["year"].astype("category")

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from plotly.colors import qualitative
import plotly.express as px

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    [
        html.H1("Visualisering av gjennomsnittscore"),
        dcc.Dropdown(
            id="score-dropdown",
            options=[
                {"label": "CER", "value": "CER"},
                {"label": "WER", "value": "WER"},
                {
                    "label": "aligned semantic distance",
                    "value": "aligned semantic distance",
                },
                {"label": "semantic distance", "value": "semantic distance"},
                {
                    "label": "semantic distance (sBERT)",
                    "value": "semantic distance (sBERT)",
                },
            ],
            value="CER",
        ),
        dcc.Dropdown(
            id="lang-dropdown",
            options=[
                {"label": "nob", "value": "nob"},
                {"label": "nno", "value": "nno"},
            ],
            value="nno",
        ),
        dcc.Graph(id="bar-chart"),
    ]
)


# Define the callback to update the bar chart
@app.callback(
    Output("bar-chart", "figure"),
    Input("score-dropdown", "value"),
    Input("lang-dropdown", "value"),
)
def update_bar_chart(selected_score, selected_lang):
    fig = px.bar(
        mean_score_df[mean_score_df.språk == selected_lang].sort_values(
            by=["year", selected_score], ascending=True
        ),
        x="modell",
        y=selected_score,
        color="year",
        color_discrete_map={
            "2024": qualitative.Dark2[0],
            "2023": qualitative.Pastel2[0],
        },
        barmode="group",
        labels={
            "modell": "Modell",
            selected_score: "Gjennomsnittscore",
            "språk": "Språk",
            "year": "År",
        },
        title=f"Gjennomsnittlig {selected_score} (lavere er bedre)",
    )
    return fig


# Run the app
if __name__ == "__main__":
    app.run(debug=True, jupyter_mode="tab")