# Visualize last years results compared to this years results

In [None]:
from pathlib import Path
import pandas as pd
from notebook_utils import load_files_to_df

In [None]:
p_previous = Path("../data/output/2023")
p_current = Path("../data/output/2024")

prev_year_df = load_files_to_df(p_previous)
current_year_df = load_files_to_df(p_current)

data_df = pd.concat([prev_year_df, current_year_df])

Expand some abbreviations in the values and filter the columns of interest

In [None]:
# Filter columns
columns_to_keep = [
    "cer",
    "wer",
    "sbert_semdist",
    "semdist",
    "aligned_semdist",
    "date",
    "model_name",
    "language_code",
    "prediction_langcode",
    "year",
    "dialect",
    "gender",
]

data_df = data_df[columns_to_keep]

# Rename dialect abbreviations
dialect_replace = {
    "w": "vest",
    "n": "nord",
    "t": "trøndersk",
    "sw": "sørvest",
    "e": "øst",
}

data_df["dialect"] = data_df["dialect"].replace(dialect_replace)

# Rename gender abbreviations
gender_replace = {
    "m": "mann",
    "f": "kvinne",
}


data_df["gender"] = data_df["gender"].replace(gender_replace)

## Calculate mean scores for each model, language and year

In [None]:
data_dict = {
    "modell": [],
    "språk": [],
    "CER": [],
    "WER": [],
    "aligned semantic distance": [],
    "semantic distance": [],
    "semantic distance (sBERT)": [],
    "year": [],
}

for (model, lang, pred_lang, year), df_ in data_df.groupby(
    ["model_name", "language_code", "prediction_langcode", "year"]
):
    if pred_lang == "":
        continue
    data_dict["modell"].append(model)
    data_dict["språk"].append(lang)
    data_dict["CER"].append(df_.cer.mean())
    data_dict["WER"].append(df_.wer.mean())
    data_dict["aligned semantic distance"].append(df_.aligned_semdist.mean())
    data_dict["semantic distance"].append(df_.semdist.mean())
    data_dict["semantic distance (sBERT)"].append(df_.sbert_semdist.mean())
    data_dict["year"].append(year)

mean_score_df = pd.DataFrame(data_dict).drop_duplicates()
mean_score_df["year"] = mean_score_df["year"].astype("str")

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
from plotly.colors import qualitative
import plotly.express as px
import io
import plotly.io as pio


# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    [
        html.H1("Visualisering av gjennomsnittscore"),
        dcc.Dropdown(
            id="score-dropdown",
            options=[
                {"label": "CER", "value": "CER"},
                {"label": "WER", "value": "WER"},
                {
                    "label": "aligned semantic distance",
                    "value": "aligned semantic distance",
                },
                {"label": "semantic distance", "value": "semantic distance"},
                {
                    "label": "semantic distance (sBERT)",
                    "value": "semantic distance (sBERT)",
                },
            ],
            value="CER",
        ),
        dcc.Dropdown(
            id="lang-dropdown",
            options=[
                {"label": "nob", "value": "nob"},
                {"label": "nno", "value": "nno"},
            ],
            value="nno",
        ),
        dcc.Graph(id="bar-chart"),
        html.Button("Download Plot", id="download-button"),
        dcc.Download(id="download-plot"),
    ]
)


# Define the callback to update the bar chart
@app.callback(
    Output("bar-chart", "figure"),
    Input("score-dropdown", "value"),
    Input("lang-dropdown", "value"),
)
def update_bar_chart(selected_score, selected_lang):
    lang_map = {"nob": "bokmål", "nno": "nynorsk"}
    fig = px.bar(
        mean_score_df[mean_score_df.språk == selected_lang].sort_values(
            by=["modell", selected_score], ascending=True
        ),
        x="modell",
        y=selected_score,
        color="year",
        color_discrete_map={
            "2024": qualitative.Pastel2[0],
            "2025": qualitative.Dark2[0],
        },
        barmode="group",
        labels={
            "modell": "Modell",
            selected_score: "Gjennomsnittscore",
            "språk": "Språk",
            "year": "År",
        },
        title=f"Gjennomsnittlig {selected_score} for {lang_map[selected_lang]}",
    )
    return fig


# Define the callback to download the plot
@app.callback(
    Output("download-plot", "data"),
    Input("download-button", "n_clicks"),
    State("bar-chart", "figure"),
    prevent_initial_call=True,
)
def download_plot(n_clicks, figure):
    if n_clicks:
        # fig = px.bar(figure)
        buffer = io.BytesIO()
        pio.write_image(figure, buffer, format="svg", engine="kaleido")
        buffer.seek(0)
        return dcc.send_bytes(buffer.getvalue(), "plot.svg")


# Run the app
if __name__ == "__main__":
    app.run(debug=True, jupyter_mode="tab")