# WER BUCKET ANALYSE PER ASR DOMAIN 

In [20]:
import pandas as pd

In [None]:
def save_wer_scores(ref_path, hyp_path, out_path, normalize=True):
    """
    Compute sentence-level WER scores and save them to a file. Optional normalization included.

    Parameters:
        ref_path (str): Path to the reference file (.txt)
        hyp_path (str): Path to the hypothesis file (.txt)
        out_path (str): Path to the output file for saving WER scores
        normalize (bool): Whether to apply normalization (default = True)
    """
    # Read input files
    with open(ref_path, "r", encoding="utf-8") as f:
        refs = [line.strip() for line in f if line.strip()]
    with open(hyp_path, "r", encoding="utf-8") as f:
        hyps = [line.strip() for line in f if line.strip()]

    # Check length consistency
    assert len(refs) == len(hyps), "Mismatch in number of lines between reference and hypothesis"

    # Optionally normalize text
    if normalize:
        refs = [normalize_text(line) for line in refs]
        hyps = [normalize_text(line) for line in hyps]

    # Compute sentence-level WER
    results = calculate_wer_per_sentence(refs, hyps)
    wers = [r["wer"] * 100 for r in results]  # convert to percentage (0–100%)

    # Write WER scores to file
    with open(out_path, "w", encoding="utf-8") as f:
        for w in wers:
            f.write(f"{w:.2f}\n")

    print(f"[✓] WER scores saved to {out_path}")

In [None]:
import os

# commonvoice
com_dir = "/Users/sedatgunay/Desktop/compare-mt-2/commonvoice/"
com_ref = os.path.join(com_dir, "common_test_ref_texts.txt")
com_knn = os.path.join(com_dir, "common_test_knn_texts.txt")
com_van = os.path.join(com_dir, "common_test_van_texts.txt")

#librispeech
lib_dir = "/Users/sedatgunay/Desktop/compare-mt-2/librispeech/"
lib_ref = os.path.join(lib_dir, "libri_test_ref_texts.txt")
lib_knn = os.path.join(lib_dir, "libri_test_knn_texts.txt")
lib_van = os.path.join(lib_dir, "libri_test_van_texts.txt")

#voxpopuli
vox_dir = "/Users/sedatgunay/Desktop/compare-mt-2/voxpopuli/"
vox_ref = os.path.join(vox_dir, "vox_test_ref.txt")
vox_knn = os.path.join(vox_dir, "vox_test_knn.txt")
vox_van = os.path.join(vox_dir, "vox_test_van.txt")

# numerical
num_dir = "/Users/sedatgunay/Desktop/num/commonvoice/"
num_ref = os.path.join(num_dir, "common_test_ref_texts.txt")
num_van = os.path.join(num_dir, "common_test_van_texts.txt")
num_knn = os.path.join(num_dir, "test_knn_0.1_normalized_texts.txt")


# Save scores per domian
save_wer_scores(com_ref, com_knn, os.path.join(com_dir, "common_wer_knn.txt"))
save_wer_scores(com_ref, com_van, os.path.join(com_dir, "common_wer_van.txt"))

save_wer_scores(lib_ref, lib_knn, os.path.join(lib_dir, "libri_wer_knn.txt"))
save_wer_scores(lib_ref, lib_van, os.path.join(lib_dir, "libri_wer_van.txt"))

save_wer_scores(vox_ref, vox_knn, os.path.join(vox_dir, "vox_wer_knn.txt"))
save_wer_scores(vox_ref, vox_van, os.path.join(vox_dir, "vox_wer_van.txt"))


save_wer_scores(
    ref_path=num_ref,
    hyp_path=num_van,
    out_path="results/wer_scores_van_com_num.txt",
    normalize=True
)

save_wer_scores(
    ref_path=num_ref,
    hyp_path=num_knn,
    out_path="results/wer_scores_knn_com_num.txt",
    normalize=True
)

In [21]:
def load_wer_scores(path):
    with open(path, "r", encoding="utf-8") as f:
        return [float(line.strip()) for line in f if line.strip()]

In [22]:
bucket_edges = list(range(0, 110, 10))  # [0, 10, 20, ..., 100]
bucket_labels = [f"{i}-{i+10}" for i in bucket_edges[:-1]]

In [23]:
def count_in_buckets(wers, bucket_edges, labels):
    df = pd.cut(wers, bins=bucket_edges, labels=labels, right=False)
    return df.value_counts().sort_index()

# COMMONVOICE

In [24]:
wers_knn_com = load_wer_scores("results/wer_scores_knn_com.txt")
wers_vanilla_com = load_wer_scores("results/wer_scores_van_com.txt")

count_knn_com = count_in_buckets(wers_knn_com, bucket_edges, bucket_labels)
count_vanilla_com = count_in_buckets(wers_vanilla_com, bucket_edges, bucket_labels)

df_com = pd.DataFrame({
    "WER-bucket": bucket_labels,
    "Number of sentences (kNN)": count_knn_com.values,
    "Number of sentences (Vanilla)": count_vanilla_com.values,
})
df_com["Difference (kNN - Vanilla)"] = df_com["Number of sentences (kNN)"] - df_com["Number of sentences (Vanilla)"]

df_com

Unnamed: 0,WER-bucket,Number of sentences (kNN),Number of sentences (Vanilla),Difference (kNN - Vanilla)
0,0-10,8685,8597,88
1,10-20,1497,1547,-50
2,20-30,619,608,11
3,30-40,205,246,-41
4,40-50,116,125,-9
5,50-60,75,74,1
6,60-70,42,37,5
7,70-80,19,18,1
8,80-90,15,20,-5
9,90-100,0,0,0


# LIBRISPEECH

In [17]:
wers_knn_libri = load_wer_scores("results/wer_scores_knn_libri.txt")
wers_vanilla_libri = load_wer_scores("results/wer_scores_van_libri.txt")

count_knn_libri = count_in_buckets(wers_knn_libri, bucket_edges, bucket_labels)
count_vanilla_libri = count_in_buckets(wers_vanilla_libri, bucket_edges, bucket_labels)
df_libri = pd.DataFrame({
    "WER-bucket": bucket_labels,
    "Number of sentences (kNN)": count_knn_libri.values,
    "Number of sentences (Vanilla)": count_vanilla_libri.values,
})
df_libri["Difference (kNN - Vanilla)"] = df_libri["Number of sentences (kNN)"] - df_libri["Number of sentences (Vanilla)"]

df_libri

Unnamed: 0,WER-bucket,Number of sentences (kNN),Number of sentences (Vanilla),Difference (kNN - Vanilla)
0,0-10,2370,2304,66
1,10-20,326,360,-34
2,20-30,132,150,-18
3,30-40,44,53,-9
4,40-50,17,21,-4
5,50-60,25,29,-4
6,60-70,11,7,4
7,70-80,7,6,1
8,80-90,1,3,-2
9,90-100,0,0,0


# VOXPOPULI

In [18]:
wers_knn_vox = load_wer_scores("results/wer_scores_knn_vox.txt")
wers_vanilla_vox = load_wer_scores("results/wer_scores_van_vox.txt")

count_knn_vox = count_in_buckets(wers_knn_vox, bucket_edges, bucket_labels)
count_vanilla_vox = count_in_buckets(wers_vanilla_vox, bucket_edges, bucket_labels)
df_vox = pd.DataFrame({
    "WER-bucket": bucket_labels,
    "Number of sentences (kNN)": count_knn_vox.values,
    "Number of sentences (Vanilla)": count_vanilla_vox.values,
})
df_vox["Differnce (kNN - Vanilla)"] = df_vox["Number of sentences (kNN)"] - df_vox["Number of sentences (Vanilla)"]

df_vox

Unnamed: 0,WER-bucket,Number of sentences (kNN),Number of sentences (Vanilla),Differnce (kNN - Vanilla)
0,0-10,1004,883,121
1,10-20,520,559,-39
2,20-30,214,258,-44
3,30-40,59,79,-20
4,40-50,17,22,-5
5,50-60,13,18,-5
6,60-70,2,3,-1
7,70-80,0,1,-1
8,80-90,0,0,0
9,90-100,3,1,2


# COMMONVOICE NUMERICAL

In [19]:
wers_knn_com_num = load_wer_scores("results/wer_scores_knn_com_num.txt")
wers_vanilla_com_num = load_wer_scores("results/wer_scores_van_com_num.txt")

count_knn_com_num = count_in_buckets(wers_knn_com_num, bucket_edges, bucket_labels)
count_vanilla_com_num = count_in_buckets(wers_vanilla_com_num, bucket_edges, bucket_labels)
df_num = pd.DataFrame({
    "WER-bucket": bucket_labels,
    "Number of sentences (kNN)": count_knn_com_num.values,
    "Number of sentences (Vanilla)": count_vanilla_com_num.values,
})
df_num["Difference (kNN - Vanilla)"] = df_num["Number of sentences (kNN)"] - df_num["Number of sentences (Vanilla)"]

df_num

Unnamed: 0,WER-bucket,Number of sentences (kNN),Number of sentences (Vanilla),Difference (kNN - Vanilla)
0,0-10,2131,8597,-6466
1,10-20,5541,1547,3994
2,20-30,2316,608,1708
3,30-40,660,246,414
4,40-50,265,125,140
5,50-60,209,74,135
6,60-70,89,37,52
7,70-80,22,18,4
8,80-90,35,20,15
9,90-100,0,0,0
