In [21]:
import pandas as pd

mutations_df = pd.read_json("results/aggregated_mutations.json")
training_df = pd.read_csv("../train_mutations/analysis_outputs/all_test_accuracies.csv")
df = pd.merge(mutations_df, training_df, on="job_id", how="inner")

In [26]:
# Create summary table by operator (grouping by main operator, before any '_')
# Now, operator names are in the "operators" column as a list of strings

# First, explode the DataFrame so each operator in the list gets its own row
df_exploded = df.explode("operators").copy()
df_exploded["main_operator"] = df_exploded["operators"].apply(lambda x: x.split('_')[0] if isinstance(x, str) else "UNKNOWN")

operator_summary = []
total_mutations = 0
for main_operator in df_exploded['main_operator'].unique():
    op_data = df_exploded[df_exploded['main_operator'] == main_operator]
    total = len(op_data)
    total_mutations += total

    # Calculate buggy model accuracy
    buggy_models = op_data[op_data['execution_result'].apply(lambda x: x['status'] == 'BUGGY_MODEL')]
    mean_accuracy = buggy_models['execution_result'].apply(lambda x: x['accuracy']).mean()
    median_accuracy = buggy_models['execution_result'].apply(lambda x: x['accuracy']).median()
    
    operator_summary.append({
        'Operator': main_operator,
        'Total': total,
        'Mean Accuracy': f"{mean_accuracy:.2f}" if not pd.isna(mean_accuracy) else "N/A",
        'Median Accuracy': f"{median_accuracy:.2f}" if not pd.isna(median_accuracy) else "N/A",
    })

# Create DataFrame and display
operator_df = pd.DataFrame(operator_summary)
operator_df = operator_df.sort_values('Total', ascending=False)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
display(operator_df)

Unnamed: 0,Operator,Total,Mean Accuracy,Median Accuracy
4,core/ReplaceBinaryOperator,1055,0.17,0.0
0,operators/replace-rasp-comparison,896,0.26,0.0
12,core/ReplaceComparisonOperator,330,0.84,0.96
1,operators/negate-rasp-sop-select,288,0.19,0.0
6,core/NumberReplacer,283,0.64,0.9
3,operators/negate-rasp-sop-constructor,160,0.24,0.0
8,operators/decrement-integer,140,0.57,0.82
7,operators/increment-integer,131,0.69,0.96
10,operators/decrement-rasp-indices,130,0.22,0.0
9,operators/increment-rasp-indices,127,0.25,0.0


In [28]:
# For each mutation operator, print an example of a bug

# First, explode the DataFrame so each operator in the list gets its own row
df_exploded = df.explode("operators").copy()
df_exploded["main_operator"] = df_exploded["operators"].apply(lambda x: x.split('_')[0] if isinstance(x, str) else "UNKNOWN")

# We'll print the first buggy example for each main operator
for main_operator in operator_df['Operator']:
    op_data = df_exploded[df_exploded['main_operator'] == main_operator]
    buggy_examples = op_data[op_data['execution_result'].apply(lambda x: x['status'] == 'BUGGY_MODEL')]
    if not buggy_examples.empty:
        example = buggy_examples.iloc[0]
        print(f"Operator: {main_operator}")
        print("Example bug:")
        print(example['diff'])
        print("-" * 60)
    else:
        print(f"Operator: {main_operator}")
        print("No buggy example found.")
        print("-" * 60)


Operator: core/ReplaceBinaryOperator
Example bug:
--- mutation diff ---
--- asource/most_freq.py
+++ bsource/most_freq.py
@@ -65,7 +65,7 @@
       Output SOp of sort program.
     """
     keys = rasp.SequenceMap(
-        lambda x, i: x + min_key * i / max_seq_len, keys, rasp.indices
+        lambda x, i: x - min_key * i / max_seq_len, keys, rasp.indices
     )
     return make_sort_unique(vals, keys)
 
------------------------------------------------------------
Operator: operators/replace-rasp-comparison
Example bug:
--- mutation diff ---
--- asource/hist.py
+++ bsource/hist.py
@@ -11,7 +11,7 @@
       hist("abac")
       >> [2, 1, 2, 1]
     """
-    same_tok = rasp.Select(rasp.tokens, rasp.tokens, rasp.Comparison.EQ).named(
+    same_tok = rasp.Select(rasp.tokens, rasp.tokens, rasp.Comparison.LT).named(
         "same_tok"
     )
     return rasp.SelectorWidth(same_tok).named("hist")
------------------------------------------------------------
Operator: core/ReplaceComparisonOpera