# 04 — Batch Results + Per-Class Analysis (Primary DP)

This notebook runs the **primary DP/beam decoder** over the 29-item test set,
optionally performs **EN TTS → HU ASR**, and computes **WER / CER / PER**.


In [3]:

from pathlib import Path
import pandas as pd

DATA = Path("./data")
AUDIO_DIR = DATA / "audio_run"
AUDIO_DIR.mkdir(exist_ok=True, parents=True)
RESULTS_CSV = DATA / "results/results_morph_ngram_dp_v2.csv"
OUT = DATA / "results/results_morph_ngram_dp_scored_v2.csv"

#import matplotlib
#matplotlib.use("Qt5Agg")

import matplotlib.pyplot as plt
%matplotlib inline

print(RESULTS_CSV)

data/results/results_morph_ngram_dp_v2.csv


## Per-class analysis (tags for Appendix A)

In [4]:
# Rough tags per item index
ITEM_TAGS = {
 1: ["front_rounded","sibilants","affricate_cs","clusters","vowel_length"],                 # Mit sütsz kis szűcs
 2: ["front_rounded","vowel_length","plosives","rhythm"],                                    # Sárga bögre görbe bögre
 3: ["plosives","front_rounded","vowel_length","rhythm"],                                    # Fekete bikapata...
 4: ["vowel_length","sibilants","plosives","rhythm"],                                        # Két pék két szép...
 5: ["hiatus","final_long_e","vowel_length"],                                                # Jamaika a jamaikaiaké
 6: ["sibilant_clusters","affricates","affricate_cs","long_i","vowel_length"],               # A szecsuáni síncsiszoló...
 7: ["palatal_ty","palatal_ny","geminate","vowel_length"],                                   # Jobb egy lúdnyak...
 8: ["vowel_length","laterals"],                                                             # Lali a lila ló elalél
 9: ["vowel_length","vowel_harmony"],                                                        # Nem lehet a Márta...
10: ["palatal_gy","front_rounded","vowel_length"],                                          # Gyűrűt újra fűz
11: ["palatal_ny","vowel_length"],                                                           # Hány nyár
12: ["palatal_ty","affricate_cs","vowel_length"],                                           # Tyúk és csibe
13: ["affricate_dzs","word_initial","front_rounded","vowel_length"],                        # Dzsinn ül a dzsámi előtt
14: ["s_vs_sh","vowel_length","sibilants"],                                                  # Sárban szárad (s vs sz)
15: ["z_vs_zh","vowel_length","sibilants"],                                                  # Zsákban a zsemle (zs vs z)
16: ["voicing_assimilation_target","plosives"],                                             # Egy nagy kertben (t+b → d)
17: ["front_rounded_long","front_rounded_short","vowel_length"],                            # Őr űzi az ürgét
18: ["front_rounded","vowel_length"],                                                       # Két bögre teát kér
19: ["long_o","front_rounded_long","affricate_cs","vowel_length"],                          # A kórus a kocsma előtt áll
20: ["front_rounded_short","front_rounded_long","sibilants","clusters","vowel_length"],     # Szép zöld fű
21: ["front_rounded_short","front_rounded_long","sibilants","vowel_length"],                # Sütőtökből sütit süt
22: ["i_vs_long_i","geminate_dd","vowel_length"],                                           # Hidd el itt írok
23: ["long_a","i_vs_long_i_rhyme","vowel_length"],                                          # Vár a víz
24: ["front_rounded_long","sibilants","vowel_length"],                                      # Füstöl a fűrész
25: ["palatal_gy","palatal_gy_clusters","long_u","vowel_length","clusters"],                # Gyúl a Gyertya
26: ["long_vs_short_vowels","vowel_length"],                                                # Ég a lámpa
27: ["affricate_cs_onset","front_rounded_long","voiced_coda_g","vowel_length"],             # Csőből csöpög
28: ["front_rounded_long","clusters","sibilants","vowel_length"],                           # Kő őrzi őzt
}

def explode_classes(df):
    recs = []
    for i, r in df.reset_index(drop=True).iterrows():
        idx = i + 1
        for tag in ITEM_TAGS.get(idx, []):
            rec = dict(r)
            rec["item"] = idx
            rec["class"] = tag
            recs.append(rec)
    return pd.DataFrame(recs)

def agg_metric(s):
    s = s.dropna()
    return None if s.empty else float(s.mean())

def mean_or_none(series):
    s = series.dropna()
    return None if s.empty else float(s.mean())


In [5]:
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)
dfc


Unnamed: 0,hu_text,en_text_morph,morph_cost,time,hu_hyp_morph,morph_wer,morph_cer,morph_per,item,class
0,Mit sütsz kis szűcs,meet shoot speaking,3.361667,5.422086,Mégsőt szépen!,1.0,0.736842,1.250000,1,front_rounded
1,Mit sütsz kis szűcs,meet shoot speaking,3.361667,5.422086,Mégsőt szépen!,1.0,0.736842,1.250000,1,sibilants
2,Mit sütsz kis szűcs,meet shoot speaking,3.361667,5.422086,Mégsőt szépen!,1.0,0.736842,1.250000,1,affricate_cs
3,Mit sütsz kis szűcs,meet shoot speaking,3.361667,5.422086,Mégsőt szépen!,1.0,0.736842,1.250000,1,clusters
4,Mit sütsz kis szűcs,meet shoot speaking,3.361667,5.422086,Mégsőt szépen!,1.0,0.736842,1.250000,1,vowel_length
...,...,...,...,...,...,...,...,...,...,...
90,Csőből csöpög,transformed,3.785000,2.907422,Transzformed.,1.0,1.000000,2.100000,27,vowel_length
91,Kő őrzi őzt,co is e east,4.040000,3.245720,Kóissi East!,1.0,0.818182,1.444444,28,front_rounded_long
92,Kő őrzi őzt,co is e east,4.040000,3.245720,Kóissi East!,1.0,0.818182,1.444444,28,clusters
93,Kő őrzi őzt,co is e east,4.040000,3.245720,Kóissi East!,1.0,0.818182,1.444444,28,sibilants


## Summaries

In [6]:
summary = dfc.groupby("class").agg({
    "morph_wer": agg_metric,
    "morph_cer": agg_metric,
    "morph_per": agg_metric,
    "morph_cost": "mean"
}).rename(columns={"morph_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/morph_dp_aggregation_v2.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,morph_wer,morph_cer,morph_per,avg_search_cost
0,affricate_cs,0.958333,0.780765,1.316585,6.911478
1,affricate_cs_onset,1.0,1.0,2.1,3.785
2,affricate_dzs,0.8,0.916667,2.214286,6.241389
3,affricates,1.0,0.770833,1.294118,12.942857
4,clusters,1.0,0.850066,1.5625,3.64375
5,final_long_e,1.0,0.809524,1.304348,8.42
6,front_rounded,0.998413,0.821773,1.436189,7.261713
7,front_rounded_long,0.97619,0.813953,1.514286,4.702602
8,front_rounded_short,1.0,0.722222,1.324074,4.392183
9,geminate,1.0,0.8125,1.5,8.019167


In [7]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"morph_{tag}_avg"] = mean_or_none(df[f"morph_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
morph_wer_avg,1.022279
morph_cer_avg,0.883285
morph_per_avg,1.494531


In [None]:
RESULTS_CSV = Path("./data/results/results_morph_ngram_time.csv")
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "morph_wer": agg_metric,
    "morph_cer": agg_metric,
    "morph_per": agg_metric,
    "morph_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/morph_ngram_time_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

In [8]:
RESULTS_CSV = DATA / "results/results_morph_ngram_time.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "morph_wer": agg_metric,
    "morph_cer": agg_metric,
    "morph_per": agg_metric,
    "morph_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/morph_ngram_time_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,morph_wer,morph_cer,morph_per,morph_cost
0,affricate_cs,1.0,0.91344,1.597222,7.130004
1,affricate_cs_onset,1.0,1.0,2.1,3.785
2,affricate_dzs,1.0,0.791667,1.5,6.241389
3,affricates,1.0,0.854167,1.5,13.218056
4,clusters,1.0,0.859635,1.611111,3.880083
5,final_long_e,1.0,0.904762,1.521739,9.022778
6,front_rounded,1.0,0.838307,1.457317,7.737046
7,front_rounded_long,1.047619,0.9502,1.764286,5.13285
8,front_rounded_short,1.111111,0.938889,1.712963,4.984905
9,geminate,1.0,0.875,1.545455,8.437917


In [9]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"morph_{tag}_avg"] = mean_or_none(df[f"morph_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
morph_wer_avg,1.0
morph_cer_avg,0.841333
morph_per_avg,1.46496


In [None]:
RESULTS_CSV = DATA / "results/results_dp_vs_gemini_8k.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "dp_wer": agg_metric,
    "dp_cer": agg_metric,
    "dp_per": agg_metric,
    "dp_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/dp_8k_v1_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary


Unnamed: 0,class,dp_wer,dp_cer,dp_per,avg_search_cost
0,affricate_cs,1.0,0.70924,1.05433,10.460625
1,affricate_cs_onset,1.0,0.846154,1.2,7.7715
2,affricate_dzs,1.0,0.875,1.714286,10.76
3,affricates,1.0,0.604167,0.911765,15.411
4,clusters,1.0,0.887432,1.451389,6.024125
5,final_long_e,1.333333,0.809524,1.086957,13.6695
6,front_rounded,1.39881,1.203139,1.241676,11.27475
7,front_rounded_long,0.964286,0.834366,1.290476,8.676
8,front_rounded_short,0.916667,0.722222,1.314815,8.334333
9,geminate,1.4,0.75,1.318182,14.907


In [27]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"dp_{tag}_avg"] = mean_or_none(df[f"dp_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
dp_wer_avg,1.164626
dp_cer_avg,0.894636
dp_per_avg,1.204324


In [None]:
RESULTS_CSV = DATA / "results/results_dp_vs_gemini_4_5zipf.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "dp_wer": agg_metric,
    "dp_cer": agg_metric,
    "dp_per": agg_metric,
    "dp_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/dp_4_5zipf_v1_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,dp_wer,dp_cer,dp_per,avg_search_cost
0,affricate_cs,1.083333,0.691612,0.984886,10.672
1,affricate_cs_onset,1.5,1.230769,2.1,8.4
2,affricate_dzs,1.0,0.75,1.214286,10.76
3,affricates,1.333333,0.6875,0.911765,16.075
4,clusters,1.083333,0.887432,1.451389,6.4805
5,final_long_e,1.666667,1.0,1.217391,15.029
6,front_rounded,1.190476,0.79314,1.293844,12.663
7,front_rounded_long,1.285714,1.151215,1.680952,9.208143
8,front_rounded_short,1.5,1.244444,1.916667,9.068
9,geminate,1.4,0.71875,1.318182,14.934


In [23]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"dp_{tag}_avg"] = mean_or_none(df[f"dp_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
dp_wer_avg,1.15034
dp_cer_avg,0.88609
dp_per_avg,1.275127


In [24]:
RESULTS_CSV = DATA / "results/results_dp_vs_gemini_5zipf.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "dp_wer": agg_metric,
    "dp_cer": agg_metric,
    "dp_per": agg_metric,
    "dp_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/dp_5zipf_v1_aggregation.csv"
df.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,dp_wer,dp_cer,dp_per,avg_search_cost
0,affricate_cs,1.083333,0.717991,0.990196,9.39525
1,affricate_cs_onset,1.0,0.769231,1.2,7.823
2,affricate_dzs,1.0,0.791667,1.071429,10.76
3,affricates,1.333333,0.625,0.794118,12.976
4,clusters,1.0,0.91547,1.319444,5.70525
5,final_long_e,7.333333,5.142857,4.0,13.819
6,front_rounded,1.353175,0.946412,1.293273,10.756333
7,front_rounded_long,1.011905,0.862204,1.238889,8.635429
8,front_rounded_short,1.027778,0.777778,1.222222,8.196667
9,geminate,1.4,0.75,1.318182,14.934


In [25]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"dp_{tag}_avg"] = mean_or_none(df[f"dp_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
dp_wer_avg,1.380187
dp_cer_avg,1.15461
dp_per_avg,1.343032


In [11]:
# V2
DATA = Path("./data")

In [12]:
RESULTS_CSV = DATA / "results/results_dp_vs_gemini_4_5zipf.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "dp_wer": agg_metric,
    "dp_cer": agg_metric,
    "dp_per": agg_metric,
    "dp_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/dp_4_5zipf_v2_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,dp_wer,dp_cer,dp_per,avg_search_cost
0,affricate_cs,3.0625,2.948212,3.794118,9.2785
1,affricate_cs_onset,1.0,0.846154,1.3,8.0
2,affricate_dzs,1.0,0.75,0.714286,7.8
3,affricates,2.166667,0.833333,1.176471,14.082
4,clusters,2.8125,3.407482,4.444444,5.6515
5,final_long_e,2.0,1.190476,0.913043,5.556
6,front_rounded,2.315476,2.313723,2.873926,9.652333
7,front_rounded_long,1.166667,1.019347,1.372222,7.510286
8,front_rounded_short,1.444444,1.166667,1.5,7.035
9,geminate,1.4,0.75,1.363636,12.611


In [13]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"dp_{tag}_avg"] = mean_or_none(df[f"dp_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
dp_wer_avg,1.440221
dp_cer_avg,1.18083
dp_per_avg,1.550016


In [13]:
RESULTS_CSV = DATA / "results/results_dp_vs_gemini_5zipf.csv"
df = pd.read_csv(RESULTS_CSV)
dfc = explode_classes(df)

summary = dfc.groupby("class").agg({
    "dp_wer": agg_metric,
    "dp_cer": agg_metric,
    "dp_per": agg_metric,
    "dp_cost": "mean"
}).rename(columns={"dp_cost":"avg_search_cost"}).reset_index().sort_values("class")

AGGREGATION_CSV = DATA / "aggregation/dp_5zipf_v2_aggregation.csv"
summary.to_csv(AGGREGATION_CSV, index=False)
summary

Unnamed: 0,class,dp_wer,dp_cer,dp_per,avg_search_cost
0,affricate_cs,1.083333,0.717991,0.990196,9.39525
1,affricate_cs_onset,1.0,0.769231,1.2,7.823
2,affricate_dzs,1.0,0.791667,1.071429,10.76
3,affricates,1.333333,0.625,0.794118,12.976
4,clusters,1.0,0.91547,1.319444,5.70525
5,final_long_e,7.333333,5.142857,4.0,13.819
6,front_rounded,1.353175,0.946412,1.293273,10.756333
7,front_rounded_long,1.011905,0.862204,1.238889,8.635429
8,front_rounded_short,1.027778,0.777778,1.222222,8.196667
9,geminate,1.4,0.75,1.318182,14.934


In [12]:
summary = {
    "n_items": len(df),
}

for tag in ["wer","cer","per"]:
    summary[f"dp_{tag}_avg"] = mean_or_none(df[f"dp_{tag}"])

pd.DataFrame([summary]).T

Unnamed: 0,0
n_items,28.0
dp_wer_avg,1.420323
dp_cer_avg,1.319986
dp_per_avg,1.574185
