In [15]:
from typing import Iterable, List, NamedTuple, Tuple
import pandas as pd
import os
from common import subdirs_of

In [24]:
def convert_percentage_to_float(s: str) -> float:
    return float(s.strip('%')) / 100


def read_experiment_data(path: str) -> pd.DataFrame:
    df = pd.read_csv(
        path,
        converters={
            ' bit_cvg': convert_percentage_to_float,
            ' shw_cvg': convert_percentage_to_float,
        }
    )

    # trim white spaces for column names
    df.rename(columns=lambda x: x.strip(), inplace=True)

    return df


In [43]:
df = read_experiment_data('/home/peter/archives/combined/libfuzzer/dagisel/aarch64/0/default/plot_data')
df.tail()

Unnamed: 0,# relative_time,cycles_done,cur_item,corpus_count,pending_total,pending_favs,bit_cvg,shw_cvg,saved_crashes,saved_hangs,max_depth,execs_per_sec,total_execs,edges_found
50725,259178,0,21928,23992,21872,324,0.79321,0.02574,0,0,27,67.21,34779453,51984
50726,259184,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,79.58,34779798,51985
50727,259189,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,76.73,34780168,51985
50728,259194,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,69.27,34780512,51985
50729,259196,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,67.23,34780677,51985


In [28]:
class Experiment(NamedTuple):
    fuzzer: str
    isel: str
    arch: str
    replicate_id: int
    data: pd.DataFrame

def iterate_over_all_experiments(dir: str, allow_missing_data: bool = False) -> Iterable[Experiment]:
    for fuzzer_dir in subdirs_of(dir):
        fuzzer = fuzzer_dir.name
        for isel_dir in subdirs_of(fuzzer_dir.path):
            isel = isel_dir.name
            for arch_dir in subdirs_of(isel_dir.path):
                arch = arch_dir.name
                for replicate_dir in subdirs_of(arch_dir.path):
                    replicate_id = int(replicate_dir.name)
                    plot_data_path = os.path.join(replicate_dir.path, 'default', 'plot_data')
                    try:
                        yield Experiment(fuzzer, isel, arch, replicate_id, read_experiment_data(plot_data_path))
                    except FileNotFoundError:
                        if not allow_missing_data:
                            raise


In [44]:
def combine_last_row_of_each_experiment_data(experiments: Iterable[Experiment], columns: List[str]) -> pd.DataFrame:
    return pd.DataFrame(
        columns=['fuzzer', 'isel', 'arch', 'replicate', *columns],
        data=(
            [
                exp.fuzzer, 
                exp.isel, 
                exp.arch, 
                exp.replicate_id, 
                *exp.data.tail(1)[columns].values.flatten().tolist()
            ]
            for exp in experiments
        )
    )
    # df = df[df[prop] == value]

    # if df.shape[0] == 0:
    #     raise Exception(f"Experiment {replicate_dir.path} does not have a record where {prop} = {value}")

In [45]:
df = combine_last_row_of_each_experiment_data(
    iterate_over_all_experiments(
        '/home/peter/archives/combined',
        allow_missing_data=True
    ),
    columns=['# relative_time', 'total_execs']
)

df

Unnamed: 0,fuzzer,isel,arch,replicate,# relative_time,total_execs
0,libfuzzer,dagisel,aarch64,0,259196.0,3.478068e+07
1,libfuzzer,dagisel,aarch64,1,259196.0,4.188732e+07
2,libfuzzer,dagisel,aarch64,2,259196.0,3.677318e+07
3,libfuzzer,dagisel,aarch64_32,0,259196.0,3.486497e+07
4,libfuzzer,dagisel,aarch64_32,1,259196.0,4.159030e+07
...,...,...,...,...,...,...
233,aflplusplus,dagisel,ve,4,16.0,1.304326e+09
234,aflplusplus,dagisel,ve,1,18.0,1.246483e+09
235,aflplusplus,dagisel,ve,3,26.0,1.729388e+09
236,aflplusplus,dagisel,ve,0,17.0,1.132901e+09
