In [6]:
import pandas as pd

def calculate_task_statistics(file_path):
    """
    Reads a given file containing task results and computes:
    - Total evaluations per task
    - Number of successful attempts per task
    - Success rate per task

    Args:
        file_path (str): Path to the input file.

    Returns:
        pd.DataFrame: DataFrame containing task names, total evaluations, 
                      success count, and success rates.
    """
    # Load the data
    df = pd.read_csv(file_path, sep="\t")

    # Ensure the 'result' column is treated as a boolean
    df["result"] = df["result"].astype(bool)

    # Calculate statistics per task
    task_stats = df.groupby("task name").agg(
        total_evaluations=("result", "count"),
        success_count=("result", "sum"),
        success_rate=("result", "mean")
    ).reset_index()

    # Rename columns for clarity
    task_stats.rename(columns={"result": "success_rate"}, inplace=True)

    return task_stats

# Example usage:
file_path = "eval_qwen-turbo.txt"  # Change this to your actual file path
task_stats_df = calculate_task_statistics(file_path)

# Display the results
print(task_stats_df)


        task name  total_evaluations  success_count  success_rate
0  crafting_table                 68             68      1.000000
1    iron_pickaxe                 68             12      0.176471
2   stone_pickaxe                 68             56      0.823529
3  wooden_pickaxe                 62             57      0.919355


In [None]:
# Example usage:
file_path = "eval_qwen-max.txt"  # Change this to your actual file path
task_stats_df = calculate_task_statistics(file_path)

# Display the results
print(task_stats_df)