In [1]:
import pandas as pd
from pathlib import Path

p = Path(".")
files = list(p.resolve().rglob("0*.jsonl"))
print(files)

df = pd.concat([pd.read_json(f, orient="records", lines=True) for f in files], ignore_index=True)

from sklearn.metrics import accuracy_score, f1_score

df["accuracy"] = df.apply(
    lambda row: accuracy_score(row["y_true"], row["y_pred"]), axis=1
)
df["macroF1"] = df.apply(
    lambda row: f1_score(row["y_true"], row["y_pred"], average="macro"), axis=1
)

def assign_model(row):
    try:
        return row["train_config"]["model_name_or_path"]
    except:
        return "/".join(row["model_name_or_path"].split("/")[-2].split("_")[0:2])

df["model"] = df.apply(assign_model, axis=1)
df.shape
df.output_column.unique()

[PosixPath('/home/peterr/macocu/task11/010_results.jsonl'), PosixPath('/home/peterr/macocu/task11/011_results.jsonl'), PosixPath('/home/peterr/macocu/task11/013_results_english.jsonl'), PosixPath('/home/peterr/macocu/task11/008_results.jsonl'), PosixPath('/home/peterr/macocu/task11/012_results_nonslavic.jsonl'), PosixPath('/home/peterr/macocu/task11/012_results.jsonl'), PosixPath('/home/peterr/macocu/task11/013_results_slavic_asr.jsonl')]


array(['Speaker_gender', 'Speaker_name', 'Speaker_age_group',
       'Party_status'], dtype=object)

In [2]:
print(df.loc[(df.output_column == "Party_status") & df.eval_file.str.contains("test"), "model eval_file accuracy macroF1".split()].to_latex(index=False))

\begin{tabular}{llrr}
\toprule
                                        model &     eval\_file &  accuracy &   macroF1 \\
\midrule
       facebook/wav2vec2-large-960h-lv60-self &  012\_test.csv &  0.548889 &  0.531373 \\
       facebook/wav2vec2-large-960h-lv60-self &  012\_test.csv &  0.590000 &  0.587285 \\
  facebook/wav2vec2-large-slavic-voxpopuli-v2 &  012\_test.csv &  0.590000 &  0.587285 \\
 classla/wav2vec2-large-slavic-parlaspeech-hr &  012\_test.csv &  0.590000 &  0.587285 \\
 classla/wav2vec2-large-slavic-parlaspeech-hr &  012\_test.csv &  0.626667 &  0.625928 \\
\bottomrule
\end{tabular}



Let us only keep rows that were calculated on test split:

In [None]:
# df = df[df.eval_file.str.contains("test")]
# df.shape

In [None]:
df.columns

In [None]:
gb = df.groupby([
"output_column", 
"model", 
"eval_file", 
"clip_seconds"
]).agg(
    {"macroF1": "mean", "accuracy": "mean"}
).reset_index()

print(gb.to_markdown())

gb

Only `Party_status` experiments

In [None]:
df[
    (df.output_column == "Party_status") &
    df.eval_file.str.contains("test")
    ]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("science no-latex".split())
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(x="output_column", y="accuracy",ci="sd", data=df[df.eval_file.str.contains("test")], hue="clip_seconds", ax=ax)
plt.ylim((0.5, 1))
plt.show()

In [20]:
df.drop_duplicates(subset=["accuracy", "model_name_or_path",'eval_file', "clip_seconds"],inplace=True)
c1 = df.output_column == "Speaker_name"
c2 = df.clip_seconds == -1
c3 = [True if 0.8 < i < 1.1 else False for i in df.accuracy.values]
df.loc[c1, ["model_name_or_path", "clip_seconds", "eval_file", "accuracy", "macroF1", 'model']].sort_values(by="accuracy")

Unnamed: 0,model_name_or_path,clip_seconds,eval_file,accuracy,macroF1,model
5,models/facebook_wav2vec2-large-960h-lv60-self_...,2,003_speaker_id_test_for_datasets.csv,0.106,0.047947,facebook/wav2vec2-large-960h-lv60-self
4,models/facebook_wav2vec2-large-960h-lv60-self_...,2,003_speaker_id_dev_for_datasets.csv,0.14,0.079944,facebook/wav2vec2-large-960h-lv60-self
6,models/facebook_wav2vec2-large-960h-lv60-self_...,-1,003_speaker_id_dev_for_datasets.csv,0.316,0.255417,facebook/wav2vec2-large-960h-lv60-self
7,models/facebook_wav2vec2-large-960h-lv60-self_...,-1,003_speaker_id_test_for_datasets.csv,0.334,0.274969,facebook/wav2vec2-large-960h-lv60-self
25,models/facebook_wav2vec2-large-slavic-voxpopul...,2,003_speaker_id_dev_for_datasets.csv,0.738,0.703877,facebook/wav2vec2-large-slavic-voxpopuli-v2
26,models/facebook_wav2vec2-large-slavic-voxpopul...,2,003_speaker_id_test_for_datasets.csv,0.806,0.784407,facebook/wav2vec2-large-slavic-voxpopuli-v2
27,models/facebook_wav2vec2-large-slavic-voxpopul...,-1,003_speaker_id_dev_for_datasets.csv,0.992,0.991667,facebook/wav2vec2-large-slavic-voxpopuli-v2
28,models/facebook_wav2vec2-large-slavic-voxpopul...,-1,003_speaker_id_test_for_datasets.csv,0.998,0.997995,facebook/wav2vec2-large-slavic-voxpopuli-v2
42,/home/peterr/macocu/task11/models/classla_wav2...,-1,003_speaker_id_dev_for_datasets.csv,0.998,0.997995,classla/wav2vec2-large-slavic-parlaspeech-hr
44,/home/peterr/macocu/task11/models/classla_wav2...,2,003_speaker_id_dev_for_datasets.csv,0.998,0.997995,classla/wav2vec2-large-slavic-parlaspeech-hr
