In [1]:
import pandas as pd
import os
from pathlib import Path

In [2]:
base_dir = Path("../../analysis/")
short_dir = base_dir / "acl6060-short"
long_dir = base_dir / "acl6060-long"

In [3]:
langs = ["en_de", "en_zh", "en_fr", "en_pt"]

In [4]:
# show samples
df = pd.read_csv(f"{short_dir}/acl6060_en_de.csv")
df.head(3)

Unnamed: 0,system,LinguaPy,metricx_qe_score,QEMetricX_24-Strict-linguapy,xcomet_qe_score,XCOMET-QE-Strict-linguapy
0,aya_canary-v2,-0.240385,85.00252,84.96687,87.002978,86.983776
1,aya_owsm4.0-ctc,-0.480769,84.001397,83.83228,86.416648,86.34858
2,aya_seamlessm4t,-0.480769,84.424167,84.206799,86.577077,86.5106


# Compute diffs by 

$$\Delta_{\text{length}} = 100 \cdot \left( Q_t^{\text{short}} - Q_t^{\text{long}} \right) \,/\, Q_t^{\text{short}}$$

In [5]:
def compute_diffs(short_file, long_file, fill_missing_with_zero=False):
    """ SHORT - LONG """
    # Designate specific columns
    cols = ["QEMetricX_24-Strict-linguapy", "XCOMET-QE-Strict-linguapy"]
        
    short_df = pd.read_csv(short_file)[["system"] + cols]
    long_df = pd.read_csv(long_file)[["system"] + cols]

    merged = short_df.merge(long_df, on='system', how='outer', suffixes=('_sh', '_lg'), indicator=True)

    # Matching column names
    sh_cols = list(merged.filter(like='_sh').columns)
    lg_cols = [c.replace('_sh', '_lg') for c in sh_cols]

    common_pairs = [(s, l) for s, l in zip(sh_cols, lg_cols) if l in merged.columns]

    # result table
    out = merged[['system', '_merge']].copy()

    # compute diff
    for s_col, l_col in common_pairs:
        base = s_col[:-3]  # remove trailing '_sh'
        diff_col = f'{base}_diff'

        # Convert to num
        s_vals = pd.to_numeric(merged[s_col], errors='coerce')
        l_vals = pd.to_numeric(merged[l_col], errors='coerce')

        if fill_missing_with_zero:
            s_vals = s_vals.fillna(0)
            l_vals = l_vals.fillna(0)

        out[diff_col] = ((s_vals - l_vals) / s_vals * 100).round(2) # expected higher - expected lower

    return out

In [6]:
result = {}
for lang in langs:
    short_file = short_dir / f"acl6060_{lang}.csv"
    long_file = long_dir / f"acl6060_{lang}.csv"
    
    diff = compute_diffs(short_file, long_file)
    result[lang] = diff
print(result)

{'en_de':                system     _merge  QEMetricX_24-Strict-linguapy_diff  \
0       aya_canary-v2       both                              -2.56   
1     aya_owsm4.0-ctc       both                              -3.67   
2     aya_seamlessm4t  left_only                                NaN   
3         aya_whisper       both                               2.66   
4           canary-v2       both                              10.92   
5           desta2-8b       both                              78.61   
6     gemma_canary-v2       both                              -0.60   
7   gemma_owsm4.0-ctc       both                              -2.05   
8   gemma_seamlessm4t  left_only                                NaN   
9       gemma_whisper       both                               3.50   
10        owsm4.0-ctc       both                               0.39   
11     phi4multimodal       both                               1.36   
12      qwen2audio-7b       both                              94.85

In [7]:
for k, v in result.items():
    v.sort_values("system").to_csv(f"diff_{k}.csv", index=None)