In [None]:
import pandas as pd
from pathlib import Path

In [None]:
base_dir = Path("../../analysis/")
clean_dir = base_dir / "fleurs"
noisy_dir = base_dir / "noisy_fleurs_ambient"

In [None]:
langs = [ 'en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl',  'en_zh',     
                    'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en',  'zh_en', ]

In [None]:
def compute_diffs(clean_file, noisy_file, fill_missing_with_zero=False):
    """Compute differences between clean and noisy results: clean - noisy"""
    
    clean_df = pd.read_csv(clean_file)
    noisy_df = pd.read_csv(noisy_file)

    # Merge with clear suffixes
    merged = clean_df.merge(
        noisy_df, on='system', how='outer', suffixes=('_clean', '_noisy'), indicator=True
    )

    # Matching column names
    clean_cols = list(merged.filter(like='_clean').columns)
    noisy_cols = [c.replace('_clean', '_noisy') for c in clean_cols]

    # Keep only columns that exist in both
    common_pairs = [(c_col, n_col) for c_col, n_col in zip(clean_cols, noisy_cols) if n_col in merged.columns]

    # Start output table
    out = merged[['system', '_merge']].copy()

    # Compute diff: clean - noisy
    for c_col, n_col in common_pairs:
        base = c_col[:-6]  # remove trailing '_clean'
        diff_col = f'{base}_diff'

        # Convert to numeric
        clean_vals = pd.to_numeric(merged[c_col], errors='coerce')
        noisy_vals = pd.to_numeric(merged[n_col], errors='coerce')

        if fill_missing_with_zero:
            clean_vals = clean_vals.fillna(0)
            noisy_vals = noisy_vals.fillna(0)

        out[diff_col] = (clean_vals - noisy_vals).round(4)

    return out

In [None]:
result = {}
for lang in langs:
    clean_file = clean_dir / f"fleurs_{lang}.csv"
    noisy_file = noisy_dir / f"noisy_fleurs_ambient_{lang}.csv"
    
    diff_df  = compute_diffs(clean_file, noisy_file)
    result[lang] = diff_df
    noisy_df = pd.read_csv(noisy_file)
    system_order = noisy_df['system'].tolist()
    diff_df = diff_df.set_index('system').reindex(system_order).reset_index()
    out_file = noisy_dir / f"diff_{lang}.csv"
    diff_df.to_csv(out_file, index=False)

In [None]:
for k, v in result.items():
    print(k)
    print(v, '\n\n')

en_es
               system      _merge  LinguaPy_diff  \
0       aya_canary-v2        both         0.0000   
1     aya_owsm4.0-ctc        both        -0.9332   
2     aya_seamlessm4t        both        -0.1555   
3         aya_whisper        both        -0.0000   
4           canary-v2        both         0.1555   
5           desta2-8b        both        -0.6221   
6     gemma_canary-v2        both        -0.0000   
7   gemma_owsm4.0-ctc        both        -0.7776   
8   gemma_seamlessm4t        both        -0.1556   
9       gemma_whisper        both        -0.3111   
10        owsm4.0-ctc        both        -0.1555   
11     phi4multimodal        both         0.3111   
12      qwen2audio-7b        both        -0.3110   
13        seamlessm4t        both         0.0000   
14            spirelm        both        -0.4665   
15    tower_canary-v2        both        -0.0000   
16  tower_owsm4.0-ctc        both         0.3110   
17  tower_seamlessm4t        both        -0.1556   
18    

In [None]:
print(clean_df)

   direction             system  LinguaPy  QEMetricX_24-Strict-linguapy  \
0      zh_en          canary-v2   65.6085                       24.3884   
1      zh_en    tower_canary-v2   43.9153                       19.0280   
2      zh_en      aya_canary-v2    9.8413                       18.8023   
3      zh_en        owsm4.0-ctc    3.4921                       19.5131   
4      zh_en          desta2-8b    0.6349                        4.3109   
5      zh_en            whisper    0.2116                        5.9495   
6      zh_en      qwen2audio-7b    5.0794                        3.9521   
7      zh_en     phi4multimodal    5.7143                        3.7811   
8      zh_en  voxtral-small-24b    0.2116                        3.0309   
9      zh_en  gemma_owsm4.0-ctc    0.5291                        3.1692   
10     zh_en        seamlessm4t    0.0000                        3.4874   
11     zh_en  tower_owsm4.0-ctc    0.0000                        2.6813   
12     zh_en    aya_owsm4

In [None]:
print(noisy_df)

               system   LinguaPy  metricx_qe_score  \
0             whisper   0.317460          7.521230   
1         seamlessm4t   0.000000          4.593753   
2           canary-v2  40.634921         19.435893   
3         owsm4.0-ctc   3.068783         20.098030   
4       gemma_whisper   0.634921          3.255856   
5       tower_whisper   0.317460          2.965392   
6         aya_whisper   0.105820          2.867684   
7     aya_seamlessm4t   0.317460          2.714263   
8   gemma_seamlessm4t   0.529101          3.064188   
9   tower_seamlessm4t   0.105820          2.918914   
10      aya_canary-v2   4.761905         17.567165   
11    gemma_canary-v2        NaN               NaN   
12    tower_canary-v2        NaN               NaN   
13    aya_owsm4.0-ctc   0.211640          4.039191   
14  gemma_owsm4.0-ctc   0.529101          4.779405   
15  tower_owsm4.0-ctc   0.211640          4.350014   
16          desta2-8b   0.105820          6.506348   
17      qwen2audio-7b   5.07