# Compare results for different dialects

In [None]:
from pathlib import Path
from notebook_utils import load_files_to_df, expand_abbreviations, make_plot
from asr_eval import utils
import pandas as pd


pd.options.mode.copy_on_write = True

In [None]:
output_dir = Path("../data/output/2024")

df = load_files_to_df(output_dir)

df = expand_abbreviations(df)

In [None]:
columns_to_keep = [
    "cer",
    "wer",
    "sbert_semdist",
    "semdist",
    "aligned_semdist",
    "date",
    "model_name",
    "language_code",
    "prediction_langcode",
    "year",
    "dialect",
    "gender",
    "standardized_text",
    "standardized_text_nn",
    "standardized_prediction",
]

df = df[columns_to_keep]

In [None]:
mean_score_file = Path("../data/output/2024/mean_scores_dialect.csv")
if not mean_score_file.exists():
    mean_score_df = utils.calculate_mean_scores(df, "dialect")
    mean_score_df.drop_duplicates(subset=["modell", "språk", "dialect"], inplace=True)
    mean_score_df.to_csv(mean_score_file, index=False)
else:
    mean_score_df = pd.read_csv(mean_score_file)

In [None]:
imagedir = Path("images")
imagedir.mkdir(exist_ok=True, parents=True)

## Bokmål

In [None]:
make_plot(
    mean_score_df,
    plot_type="heatmap",
    feature="dialect",
    metric="WER",
    language="nob",
    figsize=(12, 8),
    save_to_dir=imagedir,
)

## Nynorsk

In [None]:
make_plot(
    mean_score_df,
    plot_type="heatmap",
    feature="dialect",
    metric="WER",
    language="nno",
    figsize=(12, 6),
    save_to_dir=imagedir,
)