# Experiments results

In [None]:
import os 
import ast
import sys
import json
import torch
import numpy as np
import pandas as pd

In [None]:
sys.path.append('../')
sys.path.append('../../')

In [None]:
# Looking at the best configs 
from src.utils.files import json2data
from src.utils.code import clean_code
from src.utils.distance import seq_dist

In [None]:
from datasets import load_dataset

## Computation functions 

In [None]:
def compute_success_rate(dataframe, group=""):
    """ 
    Compute the ratio of number of buggy submissions which
    were successfully repaired. 
    
    Uses non empty repairs
    
    """
    f = lambda subdf: (subdf.repair != "").sum() / len(subdf)
    if group:
        return dataframe.groupby(group).apply(f)
    return f(dataframe)

def compute_seq_distance(dataframe, group=""):
    """ 
    Computes the average sequence edit distance between 
    the successfully repaired buggy programs and their
    corrections. 
    """
    f = lambda subdf: subdf.loc[subdf.repair.astype(bool), "seq_dist"].mean()
    if group:
         return dataframe.groupby(group).apply(f)
    return f(dataframe)


def number_of_solutions(dataframe, group=""):
    """ 
    Compute the ratio of number of buggy submissions which
    were successfully repaired. 
    
    Uses non empty repairs
    
    """
    f = lambda subdf: (subdf.repair != "").sum()
    if group:
        return dataframe.groupby(group).apply(f)
    return f(dataframe)

In [None]:
def get_results(df, tool_name):
    df["seq_dist"] = [seq_dist(b, c) 
                      for b, c in df[["func_code", "repair"]].to_numpy()]
    
    per_ass_sr = compute_success_rate(df, "assignment_id")
    per_ass_sr = per_ass_sr.to_frame(f"{tool_name}_SR").reset_index()
    per_ass_ds = compute_seq_distance(df, "assignment_id")
    per_ass_ds = per_ass_ds.to_frame(f"{tool_name}_SD").reset_index(drop=True)
    
    print("Number of buggy programs", len(df.repair))
    print("Number of repairs found", df.repair.astype(bool).sum())
    print("Total success rate", compute_success_rate(df), tool_name)
    print("Total average distance", compute_seq_distance(df), tool_name)
    
    #nb_sols = number_of_solutions(df, "assignment_id")
    #nb_sols = nb_sols.to_frame(f"{tool_name}_NB")
    descriptions = df.groupby('assignment_id', as_index=False).first().description
    # nb_sols = df.groupby("assignment_id", as_index=False).
    agg = pd.concat([per_ass_sr, per_ass_ds, descriptions], axis=1)
    return agg

In [None]:
from tokenize_rt import src_to_tokens

def doesnt_compiles(code):
    try:
        ast.parse(code)
        src_to_tokens(code)
        return False
    except:
        return True
    
def process_generic(df, ds_name, model_name):
    print("Dataset", ds_name)
    print("Tool", model_name)
    df.loc[pd.isnull(df.repair), "repair"] = ""
    df.loc[list(map(doesnt_compiles, df.func_code)), "func_code"] = ""
    df.loc[list(map(doesnt_compiles, df.repair)), "repair"] = ""
    df = df[df.func_code.astype(bool)]
    df.repair = df.repair.apply(clean_code)
    df.func_code = df.func_code.apply(clean_code)
    df = get_results(df, model_name)
    df['dataset'] = ds_name
    
    
        
    return df 

#### Loading the results from Refactory

In [None]:
def load_refactory_results(accepted_submission_ids=None):
    # TODO: here it should be Refactory dublin_testing_results.csv 
    mapping = {
        "dublin": "./data/refactory/dublin_evaluation_results.csv",
        "newcaledonia": "./data/refactory/newcaledonia_evaluation_results.csv",
        "singapore": "./data/refactory/singapore_evaluation_results.csv",
    }
    refactory_dataframe = []
    for dataset_name, csv_path in mapping.items():
        dataframe = pd.read_csv(csv_path)
        
        if accepted_submission_ids is not None:
            asids = accepted_submission_ids[dataset_name]
            dataframe = dataframe[dataframe.submission_id.isin(asids)]
            
        # Important: here we need to set to the empty string the repairs
        # found by Refactory which did not pass all the tests
        dataframe.loc[~dataframe.repair_correctness, "repair"] = ""
        results = process_generic(dataframe, dataset_name, "RF")
        refactory_dataframe.append(results)  
        
    refactory_dataframe = pd.concat(refactory_dataframe, axis=0, ignore_index=True)
    return refactory_dataframe 

#### Merging with the other dataframe

In [None]:
def merge_results(model_dataframe, asids):
    refactory_dataframe = load_refactory_results(asids)
    print("rf", refactory_dataframe)
    refactory_dataframe.assignment_id = refactory_dataframe.assignment_id.astype(str)
    tmp = refactory_dataframe.set_index(["dataset", "assignment_id"])
    tmp = tmp[[c for c in tmp.columns if c not in model_dataframe]]
    seq2seq_results = model_dataframe.join(tmp, on=["dataset", "assignment_id"], rsuffix="r_")
    return seq2seq_results

In [None]:
remove_assignments = [
    "bsearch", "factorial", "remove_zeros", 
    "swap_keys_values", "swap_unique_keys_values", "selection_sort"
]

## Loading the results from the Neural Model

### Data analysis: number of available correct solutions

In [None]:
from datasets import load_dataset

train_data = load_dataset("koutch/intro_prog", "dublin_data")["train"]
train_data = train_data.to_pandas()
# train_data = train_data[~train_data.assignment_id.isin(remove_assignments)]
train_data

In [None]:
print("number of correct solutions in the trainnig set", train_data.correct.sum())
print("number of incorrect solutions in the training set", len(train_data) - train_data.correct.sum())

#### Number of solutions that Refactory managed to repair in the training set

In [None]:
path = "./data/refactory/training_results.csv"
df = pd.read_csv(path)
df.loc[~df.repair_correctness, "repair"] = ""
results = process_generic(df, "training", "RF")
results

In [None]:
print("Number of programs repaired by refactory", df.repair.astype(bool).sum())

### Experiments results

In [None]:
def obtain_repairs(df, details, dist_f):
    repairs = []
    for i, (buggy, predictions) in enumerate(df[["func_code", "generations"]].to_numpy()):
        repair, min_dist = "", np.inf 
        correct_predictions = []
        for j, prediction in enumerate(predictions):
            if details[str(i)][j][1]["passed"]:
                if dist_f(buggy, prediction) < min_dist:
                    repair = prediction
        repairs.append(repair)
        
    df["repair"] = repairs
    
    return df 

In [None]:
def get_seq2seq_results():
    accepted_submission_ids = {}
    results_folder = './data/seq2seq/results/'
    datasets = ["dublin", "newcaledonia", "singapore"]
    seq2seq_dataframe = []
    for name in datasets:
        path = os.path.join(results_folder, f"{name}_evaluation_results.json")
        data = json2data(path)
        model_dataframe = pd.DataFrame(data['eval_ds'])
        model_dataframe = obtain_repairs(model_dataframe, data['details'], seq_dist)
        accepted_submission_ids[name] = set(model_dataframe.submission_id)
        print("dataset", name, "number of solutions to repair", len(accepted_submission_ids[name]))
        # TODO: average number of 
        model_dataframe = process_generic(model_dataframe, name, "LLM")
        
        seq2seq_dataframe.append(model_dataframe)
        
    seq2seq_dataframe = pd.concat(seq2seq_dataframe, axis=0, ignore_index=True)
    return seq2seq_dataframe, accepted_submission_ids

In [None]:
seq2seq_res, asids = get_seq2seq_results()
results = merge_results(seq2seq_res, asids)
columns = ["dataset", "assignment_id",# "description",
           "RF_SR", "LLM_SR",
           "RF_SD", "LLM_SD"]
results = results[columns]
results = results.round(2)
# changing the original assignment ids in the Singapore dataset
data_renaming = {"dublin": "DB", 
                 "newcaledonia": "NC", "singapore": "SP",
                 "1": "remove_extras", "3": "search", "4": "sort_age", "5": "top_k"}
results = results.replace(data_renaming)
results = results.rename(columns={"assignment_id": "assignment"})
results

In [None]:
print(results.to_latex(index=False))

## Loading the results for Generative Models for Code Infilling

In [None]:
def read_jsonlines(path):
    with open(path, 'r') as fp:
        return [json.loads(s) for s in fp.readlines()]

In [None]:
def get_gmci_results():
    mapping = {
        'dublin': './data/gmci/results/dublin_evaluation_results.json',
        'newcaledonia': './data/gmci/results/newcaledonia_evaluation_results.json',
        'singapore': './data/gmci/results/singapore_evaluation_results.json',
    }
    
    accepted_submission_ids = {}
    gmci_dataframe = []
    for dataset_name, csv_path in mapping.items():
        dataframe = pd.DataFrame(read_jsonlines(csv_path))
        accepted_submission_ids[dataset_name] = set(dataframe.submission_id)
        dataframe = process_generic(dataframe, dataset_name, "GMCI")
        dataframe['dataset'] = dataset_name
        gmci_dataframe.append(dataframe)
        
    gmci_dataframe = pd.concat(gmci_dataframe, axis=0, ignore_index=True)
    return gmci_dataframe, accepted_submission_ids

In [None]:
gmci_res, asids = get_gmci_results()
gmci_results = merge_results(gmci_res, asids)
# reoordering the columns
columns = ["dataset", "assignment_id",# "description",
           "RF_SR", "GMCI_SR",
           "RF_SD", "GMCI_SD"]
gmci_results = gmci_results[columns]
gmci_results = gmci_results.round(2)
data_renaming = {"dublin": "DB", 
                 "newcaledonia": "NC", "singapore": "SP",
                 "1": "remove_extras", "3": "search", "4": "sort_age", "5": "top_k"}
gmci_results = gmci_results.replace(data_renaming)
gmci_results

In [None]:
print(gmci_results.to_latex(index=False, multicolumn=True, multirow=True))

#### Loading the results from the GMCI model on Quixbugs

In [None]:
path = './data/gmci/results/quixbugs_evaluation_results.json'
dataframe = pd.DataFrame(read_jsonlines(path))
dataframe

In [None]:
print("Total number of programs repaired", dataframe.repair.astype(bool).sum())

In [None]:
print(len(dataframe))