# Compare results for different years

In [None]:
from pathlib import Path
import pandas as pd
from notebook_utils import get_formatted_score_df
from asr_eval import utils

mean_score_file = Path("../data/output/2024/mean_scores_year.csv")

if not mean_score_file.exists():
    p_previous = Path("../data/output/2023")
    p_current = Path("../data/output/2024")

    df = pd.concat(
        [get_formatted_score_df(p_previous), get_formatted_score_df(p_current)],
        ignore_index=True,
    )

    mean_score_df = utils.calculate_mean_scores(df, "year")
    mean_score_df.drop_duplicates(subset=["modell", "språk", "year"], inplace=True)
    mean_score_df.to_csv(mean_score_file, index=False)
else:
    mean_score_df = pd.read_csv(mean_score_file, dtype={"year": str})

# Plot method 1 

In [None]:
from notebook_utils import make_plot

imagedir = Path("images")
imagedir.mkdir(exist_ok=True)

lang_map = {"nob": "bokmål", "nno": "nynorsk"}

for metric in [
    "CER",
    "WER",
    "semantic distance (sBERT)",
    "semantic distance",
    "aligned semantic distance",
]:
    for language in ["nno", "nob"]:
        make_plot(
            df=mean_score_df,
            plot_type="barchart",
            feature="year",
            metric=metric,
            language=language,
            save_to_dir=imagedir,
        )

# Plot method  2 

In [None]:
import plotly.express as px
from plotly.colors import qualitative

imagedir = Path("images")
imagedir.mkdir(exist_ok=True)

lang_map = {"nob": "bokmål", "nno": "nynorsk"}

for metric in [
    "CER",
    "WER",
    "semantic distance (sBERT)",
    "semantic distance",
    "aligned semantic distance",
]:
    for language in ["nno", "nob"]:
        fig = px.bar(
            mean_score_df[mean_score_df.språk == language].sort_values(
                by=["year", "modell", metric], ascending=True
            ),
            x="modell",
            y=metric,
            color="year",
            color_discrete_map={
                "2024": qualitative.Pastel2[0],
                "2025": qualitative.Dark2[0],
            },
            barmode="group",
            labels={
                "modell": "Modell",
                metric: "Gjennomsnittscore",
                "språk": "Språk",
                "year": "År",
            },
            title=f"Gjennomsnittlig {metric} for {lang_map[language]}",
        )
        fig.write_image(
            imagedir / f"barchart_year_{'-'.join(metric.split())}_{language}.png"
        )