In [2]:
import os
import json
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm import tqdm

## AutoFL

In [6]:
def extract_length(trajectory):
    return len(trajectory)

def extract_function_call_counts(trajectory):
    return len([m for m in trajectory if "function_call" in m])

def extract_valid_function_call_counts(trajectory):
    return len([m for m in trajectory 
                if m["role"] == "function" and "error_message" not in m["content"] and "required positional argument" not in m["content"]])

def extract_repeated_call_counts(trajectory, only_func_name=True):
    prev_name = ''
    prev_arguments = {}
    repeat_count = 0
    for m in trajectory:
        if m["role"] == "assistant" and 'function_call'in m and m['function_call']:
            if prev_name == m['function_call']['name']:
                if only_func_name or prev_arguments == m['function_call']['arguments']:
                    repeat_count += 1
            else:
                prev_name = m['function_call']['name']
                prev_arguments = m['function_call']['arguments'] 
    return repeat_count

def extract_content_length(trajectory):
    return len(''.join([m['content'] for m in trajectory if m['role'] == 'assistant' and m['content']]))

def is_found(buggy_methods):
    return any([buggy_methods[m]['is_found'] for m in buggy_methods])

In [7]:
AUTOFL_DIR = Path('raw_data/autofl')

results = list()
for run_dir in tqdm(os.listdir(AUTOFL_DIR)):
    if not os.path.isdir(AUTOFL_DIR / run_dir):
        continue
    run_id = int(run_dir.split('_')[-1])
    for model_dir in os.listdir(AUTOFL_DIR / run_dir):
        assert os.path.isdir(AUTOFL_DIR / run_dir / model_dir)
        for file in os.listdir(AUTOFL_DIR / run_dir / model_dir):
            assert 'json' in file
            bug_id = file.split('-')[1].split('.')[0]
            with open(AUTOFL_DIR / run_dir / model_dir / file) as f:
                data = json.load(f)
                if 'messages' not in data or type(data['messages']) != list:
                    continue
                if 'buggy_methods' not in data or type(data['buggy_methods']) != dict:
                    continue
                trajectory = data['messages']
                results.append({
                    'bug_id': bug_id,
                    'run_id': run_id,
                    'model': model_dir,
                    'total_messages': extract_length(trajectory),
                    'function_calls': extract_function_call_counts(trajectory),
                    'valid_function_calls': extract_valid_function_call_counts(trajectory),
                    'function_repetitions': extract_repeated_call_counts(trajectory),
                    'function_and_args_repetitions': extract_repeated_call_counts(trajectory, only_func_name=False),
                    'content_length': extract_content_length(trajectory),
                    'success': is_found(data['buggy_methods']),
                })

print(len(results))
df = pd.DataFrame(results)
df.to_csv('data/autofl_base.csv',index=False)

100%|██████████| 21/21 [00:18<00:00,  1.13it/s]

32308





## Devign

In [34]:
def extract_length(trajectory):
    return len(trajectory["state"]["processed_actions"])

def extract_function_call_counts(trajectory):
    assert type(trajectory["t"]) == int
    return trajectory["t"]

def extract_valid_function_call_counts(trajectory):
    if 'positive_alarm' not in trajectory["state"]["processed_actions"][-1][0]:
        return len([a for a in trajectory["state"]["processed_actions"] if a[1] == "VALID"])
    else:
        return len([a for a in trajectory["state"]["processed_actions"][:-1] if a[1] == "VALID"])

def extract_repeated_call_counts(trajectory, only_func_name=True):
    prev_name = ''
    prev_arguments = ''
    repeat_count = 0
    for a in trajectory["state"]["history"]:
        if '(' not in a or ')' not in a:
            continue
        name = a[:a.find('(')]
        args = a[a.find('(') + 1:a.find(')')] 
        # TODO: some natural language descriptions are included
        # currently splitting the whole text into name - argument solely based on parentheses
        # are they regarded invalid? no, they seem to be discarded - we may let the processing this way
        if prev_name == name:
            if only_func_name or prev_arguments == args:
                repeat_count += 1
        else:
            prev_name = name
            prev_arguments = args
    return repeat_count

def extract_content_length(trajectory):
    return -1

def is_correct_alarm(trajectory):
    if "reward" not in trajectory["state"]:
        return False
    return trajectory["state"]["reward"] == 1

def is_vulnerability(trajectory):
    return trajectory["state"]["vulnerable"] == 1

In [None]:
DEVIGN_DIR = Path('raw_data/devign')

results = list()
count = 0
for model_dir in tqdm(os.listdir(DEVIGN_DIR)):
    if not os.path.isdir(DEVIGN_DIR / model_dir):
        continue
    model_id = model_dir.split('-')[0]
    for split_dir in os.listdir(DEVIGN_DIR / model_dir):
        if not os.path.isdir(DEVIGN_DIR / model_dir / split_dir):
            continue
        for file in os.listdir(DEVIGN_DIR / model_dir / split_dir):
            assert 'json' in file
            episode_id = int(file.split('_')[1].split('.')[0])
            with open(DEVIGN_DIR / model_dir / split_dir / file) as f:
                data = json.load(f)
                if 'trajectory' not in data or type(data['trajectory']) != dict:
                    continue # zero occurrence
                trajectory = data['trajectory']
                if 'done' not in trajectory or not trajectory['done']:
                    continue # zero occurrence

                results.append({
                    'split': split_dir,
                    'bug_id': episode_id,
                    'run_id': 0,
                    'model': model_id,
                    'total_messages': extract_length(trajectory),
                    'function_calls': extract_function_call_counts(trajectory),
                    'valid_function_calls': extract_valid_function_call_counts(trajectory),
                    'function_repetitions': extract_repeated_call_counts(trajectory),
                    'function_and_args_repetitions': extract_repeated_call_counts(trajectory, only_func_name=False),
                    'content_length': extract_content_length(trajectory),
                    'success': is_correct_alarm(trajectory),
                    'is_vulnerable': is_vulnerability(trajectory),
                })
# total_messages == function_calls + 1 always hold!
print(len(results))
df = pd.DataFrame(results)
df.to_csv('data/devign_base.csv',index=False)

100%|██████████| 5/5 [00:04<00:00,  1.18it/s]

32784



