# Translations metrics analysis

## Setup

In [8]:
from pathlib import Path

import pandas as pd

## Prepare data

In [15]:
# Reads all TSV files in the current directory
def summarize_metric(directory_path: str, metric_name: str) -> pd.DataFrame:
    """
    Summarizes a specific metric from all TSV files in a directory using pandas.Series.describe().

    Args:
        directory_path (str): Path to the directory containing TSV files.
        metric_name (str): The name of the column (metric) to summarize.

    Returns:
        pd.DataFrame: A DataFrame where each row corresponds to a TSV file and each column is an index from describe().
    """
    directory = Path(directory_path)
    if not directory.is_dir():
        raise ValueError(f"The provided path '{directory_path}' is not a valid directory.")

    summary_data = []

    for tsv_file in directory.glob("*.tsv"):
        try:
            df = pd.read_csv(tsv_file, sep='\t')
            if metric_name not in df.columns:
                print(f"Metric '{metric_name}' not found in file '{tsv_file.name}'. Skipping.")
                continue

            # Apply describe() to the specified metric column
            metric_summary = df[metric_name].describe()

            # Append the summary as a row with the file name as the index
            summary_data.append(pd.DataFrame(metric_summary).T.assign(file_name=tsv_file.name))
        except Exception as e:
            print(f"Error processing file '{tsv_file.name}': {e}")

    # Combine all summaries into a single DataFrame
    if summary_data:
        result_df = pd.concat(summary_data, ignore_index=True)
        result_df.set_index('file_name', inplace=True)
        result_df.reset_index(inplace=True)
        return result_df
    else:
        print("No valid TSV files processed.")
        return pd.DataFrame()

In [16]:
df = summarize_metric('results/en2es/full_results', 'comet_ref')

## Analysis

In [17]:
df

Unnamed: 0,file_name,count,mean,std,min,25%,50%,75%,max
0,meta-llama_llama-4-scout-17b-16e-instruct_r7m_...,1000.0,0.956281,0.080451,0.226214,0.952224,0.988632,1.0,1.0
1,gpt-4.1-nano_r3_spt4m_spt4l_upt1_meta4xnli_tra...,1000.0,0.959605,0.070247,0.49332,0.956625,0.98943,1.0,1.0
2,meta-llama_llama-4-maverick-17b-128e-instruct_...,1000.0,0.968827,0.059328,0.493226,0.966798,0.9912,1.0,1.0
3,mistral-saba-24b_r3_spt4m_spt4l_upt1_meta4xnli...,1000.0,0.966101,0.05967,0.608032,0.965112,0.990979,1.0,1.0
4,llama-3.1-8b-instant_r3_spt4m_spt4l_upt1_meta4...,1000.0,0.957577,0.080903,0.26402,0.957427,0.989978,1.0,1.0
5,meta-llama_llama-4-maverick-17b-128e-instruct_...,1000.0,0.957881,0.072383,0.492707,0.951127,0.990218,1.0,1.0
6,llama-3.3-70b-versatile_r3_spt4m_spt4l_upt1_me...,1000.0,0.966617,0.062684,0.491158,0.968793,0.9912,1.0,1.0
7,gemma2-9b-it_r3_spt4m_spt4l_upt1_meta4xnli_tra...,1000.0,0.913171,0.189142,0.200866,0.947344,0.990435,1.0,1.0
8,meta-llama_llama-4-maverick-17b-128e-instruct_...,1000.0,0.96869,0.059469,0.493226,0.965951,0.9912,1.0,1.0
9,meta-llama_llama-4-scout-17b-16e-instruct_r5l_...,1000.0,0.969661,0.059987,0.373493,0.969706,0.9912,1.0,1.0
