This notebook requires pre-computed data. You can get this data by running:

`python3 -m analysis download`

and then

`python3 -m analysis compute-localization`

This will build a `data.json` and `localization.csv` file in the root of this directory.

In [1]:
from analysis.models.data import Data

with open("../data.json", "r") as f:
    data = Data.model_validate_json(f.read())

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

df = pd.read_csv("../localization.csv")

In [3]:
# Count, per system, the number of instances resolved and the number parseable, sorted by number of resolved i
agg = df.groupby("system").agg({
    "resolved": "sum",
    "missing_files": "mean",
    "file_match": "mean",
    "file_precision": "mean",
    "function_match": "mean",
    "function_precision": "mean",
    "class_match": "mean",
    "class_precision": "mean"
})

agg["resolution_rate"] = agg["resolved"] / 500

# Sort by the resolution rate
agg = agg.sort_values("resolution_rate", ascending=False)

In [4]:
from analysis.utility import set_column_awards

styled_df = agg.copy()

for column, descending in [
    ("missing_files", False),
    ("file_match", True),
    ("file_precision", True),
    ("function_match", True),
    ("function_precision", True),
    ("class_match", True),
    ("class_precision", True),
    ("resolution_rate", True)
]:
    set_column_awards(styled_df, column, descending)

styled_df

Unnamed: 0_level_0,resolved,missing_files,file_match,file_precision,function_match,function_precision,class_match,class_precision,resolution_rate
system,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
20250206_agentscope,316,ðŸ¥‡ 0.00,0.77,ðŸ¥‡ 0.82,0.66,ðŸ¥‡ 0.68,0.80,ðŸ¥ˆ 0.71,ðŸ¥‡ 0.63
20250203_openhands_4x_scaled,303,5.80,ðŸ¥ˆ 0.84,0.78,ðŸ¥‰ 0.72,0.65,ðŸ¥ˆ 0.84,0.69,ðŸ¥ˆ 0.61
20250110_learn_by_interact_claude3.5,301,1.15,ðŸ¥‡ 0.87,0.63,ðŸ¥ˆ 0.73,0.53,ðŸ¥‡ 0.86,0.56,ðŸ¥‰ 0.60
20241208_gru,285,ðŸ¥‡ 0.00,0.67,0.77,0.58,0.62,0.69,0.66,0.57
20241213_devlo,283,0.02,0.79,0.77,0.68,0.65,0.81,0.68,0.57
20241212_epam-ai-run-claude-3-5-sonnet,277,ðŸ¥‡ 0.00,0.78,0.75,0.67,0.64,0.79,0.67,0.55
20241202_amazon-q-developer-agent-20241202-dev,275,ðŸ¥‡ 0.00,0.80,ðŸ¥‰ 0.78,0.68,0.64,0.80,0.68,0.55
20250120_Bracket,265,ðŸ¥‡ 0.00,0.80,0.73,0.68,0.63,0.81,0.65,0.53
20241108_devlo,264,0.34,0.77,0.78,0.63,ðŸ¥‰ 0.66,0.80,0.69,0.53
20241029_OpenHands-CodeAct-2.1-sonnet-20241022,263,4.54,0.78,ðŸ¥ˆ 0.81,0.65,ðŸ¥ˆ 0.66,0.79,ðŸ¥‡ 0.73,0.53
