In [4]:
import json
import itertools
from pathlib import Path
import pandas as pd

In [5]:
def load_results_summaries(base_dir, direction_pairs, system_names):
    """
    Loads all result summaries from a directory structure.

    Args:
        base_dir (str or Path): The base directory for the evaluation outputs.
        direction_pairs (list): A list of language direction strings (e.g., 'en_de').
        system_names (list): A list of system name strings.

    Returns:
        dict: A nested dictionary containing the loaded data, structured as
              {direction: {system: [results]}}.
    """
    base_path = Path(base_dir)
    all_results = {}

    # Use itertools.product to cleanly iterate over all combinations
    for direction, system in itertools.product(direction_pairs, system_names):
        summary_path = base_path / system / direction / 'results_summary.jsonl'
        
        # Initialize the nested dictionary structure
        if direction not in all_results:
            all_results[direction] = {}

        try:
            with summary_path.open('r', encoding='utf-8') as f:
                all_results[direction][system] = [json.loads(line) for line in f]
                
        except FileNotFoundError:
            print(f"Warning: File not found, skipping: {summary_path}")
            all_results[direction][system] = None # Or [] if you prefer an empty list
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON in {summary_path}: {e}")
            all_results[direction][system] = None

    return all_results

In [6]:
def convert_results_to_dataframe(results_data):
    """
    Converts the nested dictionary of results into a single pandas DataFrame.

    Each row in the DataFrame corresponds to a single entry from a .jsonl file,
    augmented with 'direction' and 'system' columns to preserve its origin.

    Args:
        results_data (dict): The nested dictionary produced by the 
                             load_results_summaries function.

    Returns:
        pandas.DataFrame: A tidy DataFrame containing all results.
    """
    # Use a list comprehension for a fast and memory-efficient approach
    # This creates a flat list of records, where each record is a dictionary
    # that includes the original data plus the direction and system.
    all_records = [
        {
            'direction': direction,
            'system': system,
            **record  # Unpack the original record's key-value pairs
        }
        for direction, systems in results_data.items()
        for system, records in systems.items()
        if records is not None  # Gracefully skip any files that were not found
        for record in records
    ]

    if not all_records:
        print("No records were found to create a DataFrame.")
        return pd.DataFrame()

    # Convert the list of dictionaries directly into a DataFrame
    df = pd.DataFrame(all_records)

    # Reorder columns to have identifying info first, for better readability
    # Get all columns from the original data, excluding our added keys
    original_cols = [col for col in df.columns if col not in ['direction', 'system']]
    # Create the desired column order
    preferred_order = ['direction', 'system'] + original_cols
    df = df[preferred_order]

    return df

In [7]:
BASE_DIR = '/hearing2translate/evaluation/output_evals/fleurs'
DIRECTION_PAIRS = ['en_de', 'de_en', 'en_es', 'es_en', 'en_fr', 'fr_en', 'en_it', 'it_en', 'en_nl', 'en_pt', 'pt_en', 'en_zh', 'zh_en']
#SYSTEM_NAMES = ['canary-v2',  'gemma_canary-v2',  'gemma_seamlessm4t',  'owsm4.0-ctc', 'qwen2audio-7b',  'spirelm', 'tower_owsm4.0-ctc',  'tower_whisper',  'whisper',
#                'desta2-8b',  'gemma_owsm4.0-ctc',  'gemma_whisper',    'phi4multimodal',  'seamlessm4t',   'tower_canary-v2',  'tower_seamlessm4t',  'voxtral-small-24b']

#SYSTEM_NAMES = ['canary-v2',  'gemma_canary-v2',  'gemma_seamlessm4t',  'owsm4.0-ctc', 'qwen2audio-7b',  'spirelm',    'whisper',
#                'desta2-8b',  'gemma_owsm4.0-ctc',  'gemma_whisper',    'phi4multimodal',  'seamlessm4t',  'voxtral-small-24b']

SYSTEM_NAMES = ['seamlessm4t', 'canary-v2', 'owsm4.0-ctc',
                'aya_whisper', 'gemma_whisper', 'tower_whisper', 
                'aya_seamlessm4t', 'gemma_seamlessm4t', 'tower_seamlessm4t',
                'aya_canary-v2', 'gemma_canary-v2', 'tower_canary-v2',
                'aya_owsm4.0-ctc', 'gemma_owsm4.0-ctc', 'tower_owsm4.0-ctc',
                'desta2-8b', 'qwen2audio-7b', 'phi4multimodal', 'voxtral-small-24b', 'spirelm'
               ]

# Call the function and store the results
results_data = load_results_summaries(BASE_DIR, DIRECTION_PAIRS, SYSTEM_NAMES)
results_df = convert_results_to_dataframe(results_data)

selected_cols = ['direction', 'system', 'SacreBLEU', 'chrF', 'LinguaPy',
                 'RefMetricX_24-Strict-linguapy', 'QEMetricX_24-Strict-linguapy',
                 'XCOMET-Strict-linguapy', 'XCOMET-QE-Strict-linguapy']
results_df = results_df[selected_cols]



In [8]:
selected_cols = ['direction', 'system', 'LinguaPy', 'QEMetricX_24-Strict-linguapy', 'XCOMET-QE-Strict-linguapy', 'chrF']
results_df = results_df[selected_cols]

In [9]:
results_df

Unnamed: 0,direction,system,LinguaPy,QEMetricX_24-Strict-linguapy,XCOMET-QE-Strict-linguapy,chrF
0,en_de,seamlessm4t,0.1560,2.4148,0.9403,61.3662
1,en_de,canary-v2,0.0000,2.3163,0.9447,61.9113
2,en_de,owsm4.0-ctc,0.1560,9.4253,0.7766,54.9522
3,en_de,aya_whisper,0.0000,1.3047,0.9666,63.3557
4,en_de,gemma_whisper,0.0000,1.3646,0.9659,62.0023
...,...,...,...,...,...,...
248,zh_en,tower_owsm4.0-ctc,0.0000,2.6813,0.8854,55.5565
249,zh_en,desta2-8b,0.6349,4.3109,0.7453,46.5472
250,zh_en,qwen2audio-7b,5.0794,3.9521,0.8176,48.0031
251,zh_en,phi4multimodal,5.7143,3.7811,0.8353,51.5074


In [10]:
lang_pairs_order = ['en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl', 'en_zh', 'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en', 'zh_en']

In [11]:
pivoted_xcomet_qe = results_df.pivot(index='system', columns='direction', values='XCOMET-QE-Strict-linguapy')[lang_pairs_order]

In [12]:
pivoted_xcomet_qe.to_csv('pivoted_xcomet_qe.csv')

In [13]:
pivoted_metricx_qe = results_df.pivot(index='system', columns='direction', values='QEMetricX_24-Strict-linguapy')[lang_pairs_order]

In [14]:
pivoted_metricx_qe.to_csv('pivoted_metricx_qe.csv')

In [15]:
pivoted_linguapy = results_df.pivot(index='system', columns='direction', values='LinguaPy')[lang_pairs_order]

In [16]:
pivoted_linguapy.to_csv('pivoted_linguapy.csv')

#### Results by language pair

In [17]:
for lang_pair in lang_pairs_order:
    lp_df = results_df.query(f"direction == '{lang_pair}'").sort_values(by = 'XCOMET-QE-Strict-linguapy').to_csv(f'fleurs_{lang_pair}.csv', index=False)

### Gender Fleurs

In [56]:
def load_all_jsons(base_dir, manifests_dir, direction_pairs, system_names):
    base_path = Path(base_dir)
    manifests_path = Path(manifests_dir)
    all_results = {}
    for direction, system in itertools.product(direction_pairs, system_names):
        results_path = base_path / system / direction / 'results.jsonl'
        direction_aux = '{direction}.jsonl'.format( direction = direction.replace('_', '-') )
        manifest_path = manifests_path / direction_aux

        # Initialize the nested dictionary structure
        if direction not in all_results:
            all_results[direction] = {}

        try:
            with results_path.open('r', encoding='utf-8') as f:
                all_results[direction][system] = [json.loads(line) for line in f]
            with manifest_path.open('r', encoding='utf-8') as f:
                manifests = [json.loads(line) for line in f]
                for it, it_manifests in zip(all_results[direction][system], manifests):
                    it_manifests['gender'] = it_manifests['benchmark_metadata']['gender']
                    it['linguapy_score'] = it['metrics']['linguapy_score'][0]
                    it['xcomet_qe_score'] = it['metrics']['xcomet_qe_score'] if it['linguapy_score'] == 0 else 0
                    it.update(it_manifests)
        
        except FileNotFoundError:
            pass

        except json.JSONDecodeError as e:
            pass

    results = []
    for direction in all_results.keys():
        for system in all_results[direction].keys():
            for item in all_results[direction][system]:
                item['direction'] = direction
                item['system'] = system
                item['sample_id'] = item['benchmark_metadata']['sample_id']
                results.append(item)

    results_df = pd.DataFrame(results)

    return results_df

In [57]:
MANIFESTS_DIR = '/hearing2translate/manifests/fleurs'
BASE_DIR = '/hearing2translate/evaluation/output_evals/fleurs'

In [58]:
DIRECTION_PAIRS = ['en_de', 'de_en', 'en_es', 'es_en', 'en_fr', 'fr_en', 'en_it', 'it_en', 'en_nl', 'en_pt', 'pt_en', 'en_zh', 'zh_en']

#SYSTEM_NAMES = ['canary-v2',  'gemma_canary-v2',  'gemma_seamlessm4t',  'owsm4.0-ctc', 'qwen2audio-7b',  'spirelm', 'tower_owsm4.0-ctc',  'tower_whisper',  'whisper',
#                'desta2-8b',  'gemma_owsm4.0-ctc',  'gemma_whisper',    'phi4multimodal',  'seamlessm4t',   'tower_canary-v2',  'tower_seamlessm4t',  'voxtral-small-24b']

SYSTEM_NAMES = ['seamlessm4t', 'canary-v2', 'owsm4.0-ctc',
                'aya_whisper', 'gemma_whisper', 'tower_whisper', 
                'aya_seamlessm4t', 'gemma_seamlessm4t', 'tower_seamlessm4t',
                'aya_canary-v2', 'gemma_canary-v2', 'tower_canary-v2',
                'aya_owsm4.0-ctc', 'gemma_owsm4.0-ctc', 'tower_owsm4.0-ctc',
                'desta2-8b', 'qwen2audio-7b', 'phi4multimodal', 'voxtral-small-24b', 'spirelm'
               ]

columns_to_keep = ['dataset_id', 'sample_id', 'src_lang', 'tgt_lang', 'xcomet_qe_score', 'linguapy_score', 'gender', 'direction', 'system' ]
# Call the function and store the results
results_and_manifests = load_all_jsons(BASE_DIR, MANIFESTS_DIR, DIRECTION_PAIRS, SYSTEM_NAMES)[columns_to_keep]

In [59]:
results_and_manifests.head()

Unnamed: 0,dataset_id,sample_id,src_lang,tgt_lang,xcomet_qe_score,linguapy_score,gender,direction,system
0,fleurs,1904,en,de,1.0,0,0,en_de,seamlessm4t
1,fleurs,1675,en,de,0.945453,0,0,en_de,seamlessm4t
2,fleurs,1950,en,de,0.876322,0,1,en_de,seamlessm4t
3,fleurs,1728,en,de,0.981796,0,1,en_de,seamlessm4t
4,fleurs,1972,en,de,0.995062,0,1,en_de,seamlessm4t


In [60]:
import io

# gender 1 --> female
# gender 0 --> male

def analyze_gender_diff_by_system(df, target_direction, target_system):
    """
    Analyzes the average difference in xcomet_qe_score between gender=0
    and gender=1 pairs for a given direction and system.
    
    Args:
        df (pd.DataFrame): The input DataFrame.
        target_direction (str): The direction to filter by (e.g., 'en_de').
        target_system (str): The system to filter by (e.g., 'canary-v2').
    """
    try:
        # Filter for the specified direction and system
        df_filtered = df[(df['direction'] == target_direction) & (df['system'] == target_system)].copy()

        if df_filtered.empty:
            print(f"No data found for direction='{target_direction}' and system='{target_system}'.")
            return

        # Separate gender 0 and gender 1
        # Ensure 'gender' is integer type for comparison
        df_filtered['gender'] = pd.to_numeric(df_filtered['gender'], errors='coerce')
        
        df_gender_0 = df_filtered[df_filtered['gender'] == 0]
        df_gender_1 = df_filtered[df_filtered['gender'] == 1]

        # Select relevant columns for merging
        cols_to_keep = ['sample_id', 'xcomet_qe_score']

        # Merge to find pairs (matching sample_id)
        # This step ensures we only get sample_ids that have *both*
        # a gender=0 and a gender=1 entry.
        df_merged = pd.merge(
            df_gender_0[cols_to_keep],
            df_gender_1[cols_to_keep],
            on='sample_id',
            suffixes=('_g0', '_g1')
        )

        # Check if any pairs were found
        num_pairs = len(df_merged)

        if num_pairs == 0:
            print(f"No matching pairs (gender 0 & 1 for the same sample_id) found for direction='{target_direction}' and system='{target_system}'.")

        # Calculate the difference (gender 1 score - gender 0 score)
        df_merged['abs_score_diff'] = abs( df_merged['xcomet_qe_score_g1'] - df_merged['xcomet_qe_score_g0'] )
        df_merged['score_diff'] = df_merged['xcomet_qe_score_g1'] - df_merged['xcomet_qe_score_g0']  # if negative, male quality better than female quality
        
        # Calculate the average difference
        avg_diff = df_merged['score_diff'].mean()
        abs_diff = df_merged['abs_score_diff'].mean()

        # Disparity score as defined in 
        
        #Giuseppe Attanasio, Beatrice Savoldi, Dennis Fucci, and Dirk Hovy. 2024. 
        #Twists, Humps, and Pebbles: Multilingual Speech Recognition Models Exhibit 
        #Gender Performance Gaps. In Proceedings of the 2024 Conference on Empirical 
        #Methods in Natural Language Processing, pages 21318–21340, Miami, Florida, 
        #USA. Association for Computational Linguistics.
        
        phi_g0 = df_merged['xcomet_qe_score_g0'].mean()
        phi_g1 = df_merged['xcomet_qe_score_g1'].mean()
        E_quality = 100.0 * (phi_g1 - phi_g0) / (phi_g0 + 0.00000001)

        return (avg_diff, abs_diff, E_quality)

    except Exception as e:
        print(f"An error occurred during analysis for {target_direction}, {target_system}: {e}")

In [61]:
DIRECTION_PAIRS = ['en_de', 'de_en', 'en_es', 'es_en', 'en_fr', 'fr_en', 'en_it', 'it_en', 'en_nl', 'en_pt', 'pt_en', 'en_zh', 'zh_en']

#SYSTEM_NAMES = ['canary-v2',  'gemma_canary-v2',  'gemma_seamlessm4t',  'owsm4.0-ctc', 'qwen2audio-7b',  'spirelm', 'tower_owsm4.0-ctc',  'tower_whisper',  'whisper',
#                'desta2-8b',  'gemma_owsm4.0-ctc',  'gemma_whisper',    'phi4multimodal',  'seamlessm4t',   'tower_canary-v2',  'tower_seamlessm4t',  'voxtral-small-24b']

SYSTEM_NAMES = ['seamlessm4t', 'canary-v2', 'owsm4.0-ctc',
                'aya_whisper', 'gemma_whisper', 'tower_whisper', 
                'aya_seamlessm4t', 'gemma_seamlessm4t', 'tower_seamlessm4t',
                'aya_canary-v2', 'gemma_canary-v2', 'tower_canary-v2',
                'aya_owsm4.0-ctc', 'gemma_owsm4.0-ctc', 'tower_owsm4.0-ctc',
                'desta2-8b', 'qwen2audio-7b', 'phi4multimodal', 'voxtral-small-24b', 'spirelm'
               ]

results_diffs = []
for direction_pair in DIRECTION_PAIRS:
    for sys in SYSTEM_NAMES:
        diff_metrics = analyze_gender_diff_by_system(results_and_manifests, direction_pair, sys)
        if diff_metrics is None:
            continue
        results_diffs.append({'system': sys, 'direction': direction_pair, 'diff_score': diff_metrics[0], 'abs_diff_score':  diff_metrics[1], 'E_quality': diff_metrics[2] })

df_diffs_scores = pd.DataFrame(results_diffs)

No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='seamlessm4t'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='canary-v2'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='owsm4.0-ctc'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='aya_whisper'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='gemma_whisper'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='tower_whisper'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='aya_seamlessm4t'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='gemma_seamlessm4t'.
No matching pairs (gender 0 & 1 for the same sample_id) found for direction='de_en' and system='tower_seamle

In [63]:
lang_pairs_order = ['en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl', 'en_zh', 'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en', 'zh_en']
df_diffs_scores_pivoted = df_diffs_scores.pivot(index='system', columns='direction', values='diff_score')[lang_pairs_order]

In [64]:
df_diffs_scores_pivoted.round(4).to_csv('pivoted_diff_scores.csv')

In [65]:
df_diffs_scores_pivoted

direction,en_es,en_fr,en_pt,en_it,en_de,en_nl,en_zh,es_en,fr_en,pt_en,it_en,de_en,zh_en
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
aya_canary-v2,0.013286,0.011394,0.02414,0.01171,0.008458,0.010485,0.008919,-0.012082,-0.004309,,0.011583,,-0.006729
aya_owsm4.0-ctc,0.025788,0.029694,0.014285,0.021269,0.010302,0.01058,0.020007,0.008963,-0.005228,,0.006074,,0.011208
aya_seamlessm4t,0.021659,0.018127,0.029858,0.016112,0.010069,-0.00061,0.015763,0.007717,-0.003629,,-0.000411,,0.008609
aya_whisper,0.017659,0.006221,0.007645,0.010297,0.009773,0.006217,0.0122,-0.005516,0.000144,,0.022112,,-0.002349
canary-v2,0.015879,0.033749,0.009841,0.011951,0.003265,0.003918,0.0,-0.030723,0.029803,,0.023716,,-0.005073
desta2-8b,0.014417,0.056142,0.01244,0.031035,-0.004199,0.014174,-0.072205,-0.01837,-0.026449,,0.076392,,0.010992
gemma_canary-v2,0.009023,0.002264,0.007321,0.01787,0.006104,0.010356,0.004819,-0.003135,-0.005797,,-0.002573,,
gemma_owsm4.0-ctc,-0.006907,-0.005178,7.9e-05,-0.010728,0.000651,-0.001167,0.006825,-0.001728,0.006568,,-0.005137,,-4.1e-05
gemma_seamlessm4t,0.02008,0.027173,0.023453,0.023105,0.014483,0.012974,0.020918,0.003178,0.013328,,-0.032046,,-0.003415
gemma_whisper,0.022427,0.021969,0.008473,0.015727,0.008363,0.020218,0.031775,-0.005824,0.005826,,0.009994,,-0.019699


In [66]:
lang_pairs_order = ['en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl', 'en_zh', 'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en', 'zh_en']
df_abs_diffs_scores_pivoted = df_diffs_scores.pivot(index='system', columns='direction', values='abs_diff_score')[lang_pairs_order]

In [67]:
df_abs_diffs_scores_pivoted.round(4).to_csv('pivoted_abs_diff_scores.csv')

In [68]:
df_abs_diffs_scores_pivoted

direction,en_es,en_fr,en_pt,en_it,en_de,en_nl,en_zh,es_en,fr_en,pt_en,it_en,de_en,zh_en
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
aya_canary-v2,0.055156,0.06065,0.064922,0.058662,0.020731,0.039742,0.05323,0.037653,0.082032,,0.033767,,0.07116
aya_owsm4.0-ctc,0.090868,0.093508,0.091863,0.092386,0.031145,0.057422,0.08705,0.055714,0.127136,,0.072312,,0.083054
aya_seamlessm4t,0.054847,0.074678,0.065108,0.063696,0.028131,0.033203,0.058997,0.046144,0.087282,,0.037781,,0.049637
aya_whisper,0.059054,0.073434,0.065007,0.054869,0.026768,0.037611,0.069445,0.044726,0.080604,,0.044644,,0.051136
canary-v2,0.069597,0.101733,0.073402,0.084761,0.030401,0.042526,0.0,0.100852,0.111725,,0.19313,,0.037625
desta2-8b,0.151148,0.205906,0.114585,0.156621,0.073496,0.107222,0.280136,0.1255,0.226413,,0.20731,,0.174504
gemma_canary-v2,0.051917,0.076287,0.052015,0.056136,0.022506,0.03981,0.048266,0.043527,0.087751,,0.054429,,
gemma_owsm4.0-ctc,0.026386,0.031,0.020046,0.034995,0.01166,0.016913,0.046435,0.021671,0.024873,,0.030634,,0.040611
gemma_seamlessm4t,0.064996,0.088517,0.062488,0.073984,0.036085,0.043941,0.074762,0.051956,0.109751,,0.068613,,0.029649
gemma_whisper,0.069975,0.09203,0.063581,0.068747,0.035279,0.040751,0.090585,0.055498,0.082797,,0.058884,,0.069474


In [69]:
lang_pairs_order = ['en_es', 'en_fr', 'en_pt', 'en_it', 'en_de', 'en_nl', 'en_zh', 'es_en', 'fr_en', 'pt_en', 'it_en', 'de_en', 'zh_en']
df_E_quality_scores_pivoted = df_diffs_scores.pivot(index='system', columns='direction', values='E_quality')[lang_pairs_order]

In [70]:
df_E_quality_scores_pivoted.round(4).to_csv('pivoted_E_quality_scores.csv')

In [71]:
df_E_quality_scores_pivoted

direction,en_es,en_fr,en_pt,en_it,en_de,en_nl,en_zh,es_en,fr_en,pt_en,it_en,de_en,zh_en
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
aya_canary-v2,1.4418,1.237563,2.64366,1.264241,0.877657,1.101046,1.005485,-1.284794,-0.477483,,1.277053,,-4.751622
aya_owsm4.0-ctc,2.897011,3.354017,1.60522,2.382816,1.081623,1.128259,2.323119,0.978102,-0.598706,,0.672152,,1.265402
aya_seamlessm4t,2.359691,1.986938,3.280215,1.755011,1.049414,-0.063792,1.802263,0.841723,-0.405376,,-0.044526,,0.927415
aya_whisper,1.921454,0.682012,0.830943,1.120184,1.019653,0.652602,1.395751,-0.591104,0.015968,,2.439355,,-0.252609
canary-v2,1.776991,3.902252,1.107797,1.344291,0.345558,0.422662,0.0,-3.634491,3.546237,,2.940353,,-11.229317
desta2-8b,1.751809,7.799422,1.512309,4.067025,-0.458246,1.641556,-13.868881,-2.186813,-3.655034,,10.709736,,1.476061
gemma_canary-v2,0.976047,0.246822,0.787099,1.929527,0.633162,1.08737,0.553883,-0.336617,-0.647017,,-0.277962,,
gemma_owsm4.0-ctc,-0.757743,-0.577957,0.008718,-1.174015,0.068178,-0.122545,0.790309,-0.19214,0.765932,,-0.58182,,-0.004727
gemma_seamlessm4t,2.196339,3.001033,2.532121,2.535608,1.51679,1.36303,2.390627,0.347114,1.52483,,-3.44684,,-0.375169
gemma_whisper,2.458755,2.436957,0.909871,1.70138,0.872292,2.133136,3.736805,-0.630129,0.650276,,1.084434,,-2.137496
