# Overlapping speech 

In [None]:
from notebook_utils import (
    load_files_to_df,
    expand_abbreviations,
    make_plot,
)
from asr_eval import utils
from pathlib import Path
import pandas as pd


pd.options.mode.copy_on_write = True

In [None]:
p_current = Path("../data/output/2024")

df = load_files_to_df(p_current)
df = expand_abbreviations(df)

In [None]:
def recreate_audiofilepath(segmented_audio_path: str) -> str:
    # new_str = segmented_audio_path.replace("segmented/", "")
    new_str, _, end_time = segmented_audio_path.rpartition("_")
    new_str, _, start_time = new_str.rpartition("_")
    return new_str


df["audiofile"] = df["segmented_audio"].apply(recreate_audiofilepath)

df.sort_values(["model_name", "audiofile", "start_ms"], inplace=True)
df["start_s"] = round(df["start_ms"] / 1000, 0)
df["end_s"] = round(df["end_ms"] / 1000, 0)

# Two segments are overlapping if the start of the current segment is before the end of the previous segment
# or the end of the current segment is after the start of the next segment
# and the segments are from the same podcast

df["overlapping"] = (df["start_s"] < df["end_s"].shift(1)) & (
    df["audiofile"] == df["audiofile"].shift(1)
) | (df["end_s"] > df["start_s"].shift(-1)) & (
    df["audiofile"] == df["audiofile"].shift(-1)
)

# df = df[df["shared"] == False]
df.overlapping.value_counts() / df.shape[0] * 100

In [None]:
mean_score_file = Path("../data/output/2024/mean_scores_overlapping.csv")
if not mean_score_file.exists():
    mean_score_df = utils.calculate_mean_scores(df, "overlapping")
    mean_score_df.drop_duplicates(
        subset=["modell", "språk", "overlapping"], inplace=True
    )
    mean_score_df.to_csv(mean_score_file, index=False)
else:
    mean_score_df = pd.read_csv(mean_score_file)

In [None]:
imagedir = Path("images")
imagedir.mkdir(exist_ok=True)

## Heatmap

In [None]:
make_plot(
    mean_score_df,
    plot_type="heatmap",
    feature="overlapping",
    metric="WER",
    language="nob",
    save_to_dir=imagedir,
)

In [None]:
make_plot(
    mean_score_df,
    plot_type="heatmap",
    feature="overlapping",
    metric="WER",
    language="nno",
    save_to_dir=imagedir,
)

## Barchart

In [None]:
make_plot(
    mean_score_df,
    plot_type="barchart",
    feature="overlapping",
    metric="WER",
    language="nob",
    save_to_dir=imagedir,
)

In [None]:
make_plot(
    mean_score_df,
    plot_type="barchart",
    feature="overlapping",
    metric="WER",
    language="nno",
    save_to_dir=imagedir,
)