In [16]:
import pandas as pd
from pathlib import Path

In [17]:
base_dir = Path("../../analysis/")
clean_dir = base_dir / "fleurs"
noisy_dir = base_dir / "noisy_fleurs_ambient"

In [18]:
langs = [ 'en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl',  'en_zh',     
                    'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en',  'zh_en', ]

In [19]:
def normalize_score(df):
    """Normalize metric socre ranges"""
    for col in df.columns:
        if "metricx" in col.lower():
            df[col] = df[col].apply(lambda x: 100 - 4 * x)
        elif "qe" in col.lower():
            df[col] = df[col].apply(lambda x: 100 * x)
        elif "linguapy" in col.lower():
            df[col] = df[col].apply(lambda x: -100 * x)

In [20]:
def compute_diffs(clean_file, noisy_file, fill_missing_with_zero=False):
    """Compute differences between clean and noisy results: clean - noisy"""
    
    clean_df = pd.read_csv(clean_file)
    noisy_df = pd.read_csv(noisy_file)

    normalize_score(clean_df)

    # Merge with clear suffixes
    merged = clean_df.merge(
        noisy_df, on='system', how='outer', suffixes=('_clean', '_noisy'), indicator=True
    )

    # Matching column names
    clean_cols = list(merged.filter(like='_clean').columns)
    noisy_cols = [c.replace('_clean', '_noisy') for c in clean_cols]

    # Keep only columns that exist in both
    common_pairs = [(c_col, n_col) for c_col, n_col in zip(clean_cols, noisy_cols) if n_col in merged.columns]

    # Start output table
    out = merged[['system', '_merge']].copy()

    # Compute diff: clean - noisy
    for c_col, n_col in common_pairs:
        base = c_col[:-6]  # remove trailing '_clean'
        diff_col = f'{base}_diff'

        # Convert to numeric
        clean_vals = pd.to_numeric(merged[c_col], errors='coerce')
        noisy_vals = pd.to_numeric(merged[n_col], errors='coerce')

        if fill_missing_with_zero:
            clean_vals = clean_vals.fillna(0)
            noisy_vals = noisy_vals.fillna(0)
        out[diff_col] = (100*(clean_vals - noisy_vals)/clean_vals).round(2)

    return out

In [21]:
result = {}
for lang in langs:
    clean_file = clean_dir / f"fleurs_{lang}.csv"
    noisy_file = noisy_dir / f"noisy_fleurs_ambient_{lang}.csv"
    
    diff_df  = compute_diffs(clean_file, noisy_file)
    result[lang] = diff_df
    noisy_df = pd.read_csv(noisy_file)
    system_order = noisy_df['system'].tolist()
    diff_df = diff_df.set_index('system').reindex(system_order).reset_index()
    out_file = noisy_dir / f"diff_{lang}.csv"
    diff_df.to_csv(out_file, index=False)

In [22]:
for k, v in result.items():
    if k == "en_zh":
        print(k)
        print(v, '\n\n')

en_zh
               system      _merge  LinguaPy_diff  \
0       aya_canary-v2        both           -inf   
1     aya_owsm4.0-ctc        both           -inf   
2     aya_seamlessm4t        both            NaN   
3         aya_whisper        both          98.00   
4           canary-v2        both          99.00   
5           desta2-8b        both          98.66   
6     gemma_canary-v2        both            NaN   
7   gemma_owsm4.0-ctc        both            NaN   
8   gemma_seamlessm4t        both            NaN   
9       gemma_whisper        both            NaN   
10        owsm4.0-ctc        both           -inf   
11     phi4multimodal        both          98.57   
12      qwen2audio-7b        both          96.33   
13        seamlessm4t        both           -inf   
14            spirelm        both            NaN   
15    tower_canary-v2        both            NaN   
16  tower_owsm4.0-ctc        both            NaN   
17  tower_seamlessm4t        both            NaN   
18    

In [14]:
print(clean_df)

NameError: name 'clean_df' is not defined

In [15]:
print(noisy_df)

               system   LinguaPy  metricx_qe_score  \
0             whisper  -0.317460         69.915081   
1         seamlessm4t  -0.000000         81.624989   
2           canary-v2 -40.634921         22.256428   
3         owsm4.0-ctc  -3.068783         19.607881   
4       gemma_whisper  -0.634921         86.976577   
5       tower_whisper  -0.317460         88.138430   
6         aya_whisper  -0.105820         88.529266   
7     aya_seamlessm4t  -0.317460         89.142946   
8   gemma_seamlessm4t  -0.529101         87.743249   
9   tower_seamlessm4t  -0.105820         88.324344   
10      aya_canary-v2  -4.761905         29.731338   
11    gemma_canary-v2        NaN               NaN   
12    tower_canary-v2        NaN               NaN   
13    aya_owsm4.0-ctc  -0.211640         83.843237   
14  gemma_owsm4.0-ctc  -0.529101         80.882382   
15  tower_owsm4.0-ctc  -0.211640         82.599945   
16          desta2-8b  -0.105820         73.974610   
17      qwen2audio-7b  -5.07