In [21]:
from typing import Iterable, List, NamedTuple, Tuple
import pandas as pd
import os
from common import subdirs_of


In [22]:
def convert_percentage_to_float(s: str) -> float:
    return float(s.strip('%')) / 100


def read_experiment_data(path: str) -> pd.DataFrame:
    # the table header is not consistent, so we don't want pandas to detect and process the header (1st row of csv)
    # but let it use the hard-coded column names below.
    return pd.read_csv(
        path,
        index_col=False,
        header=None,
        skiprows=1,
        names=[
            '# relative_time', 'cycles_done', 'cur_item', 'corpus_count', 'pending_total', 'pending_favs', 'bit_cvg',
            'shw_cvg', 'saved_crashes', 'saved_hangs', 'max_depth', 'execs_per_sec', 'total_execs', 'edges_found'
        ],
        converters={
            'bit_cvg': convert_percentage_to_float,
            'shw_cvg': convert_percentage_to_float,
        }
    )


In [23]:
# for preview purpose only
read_experiment_data(
    '/home/peter/archives/combined/libfuzzer/dagisel/aarch64/0/default/plot_data'
)


Unnamed: 0,# relative_time,cycles_done,cur_item,corpus_count,pending_total,pending_favs,bit_cvg,shw_cvg,saved_crashes,saved_hangs,max_depth,execs_per_sec,total_execs,edges_found
0,61,0,0,9,9,1,0.29233,0.00319,0,0,2,225.67,20791,19158
1,66,0,0,9,9,1,0.29233,0.00319,0,0,2,223.81,21919,19158
2,71,0,0,9,9,1,0.29233,0.00319,0,0,2,220.54,23039,19158
3,76,0,0,9,9,1,0.29233,0.00319,0,0,2,220.66,24167,19158
4,81,0,0,9,9,1,0.29233,0.00319,0,0,2,220.60,25287,19158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50725,259178,0,21928,23992,21872,324,0.79321,0.02574,0,0,27,67.21,34779453,51984
50726,259184,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,79.58,34779798,51985
50727,259189,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,76.73,34780168,51985
50728,259194,0,21928,23993,21873,324,0.79323,0.02574,0,0,27,69.27,34780512,51985


In [24]:
# for preview purpose only
read_experiment_data(
    '/home/peter/archives/combined/aflplusplus/dagisel/ve/0/default/plot_data'
)


Unnamed: 0,# relative_time,cycles_done,cur_item,corpus_count,pending_total,pending_favs,bit_cvg,shw_cvg,saved_crashes,saved_hangs,max_depth,execs_per_sec,total_execs,edges_found
0,65,0,643,703,680,247,0.1707,0.0,7,19,4,5149.52,165453,11188
1,70,0,609,729,705,245,0.1717,0.0,12,19,4,5859.50,195079,11255
2,75,0,377,750,723,255,0.1731,0.0,12,19,4,4322.63,217609,11347
3,80,0,377,768,741,255,0.1745,0.0,12,19,4,1648.54,234407,11438
4,85,0,719,790,758,263,0.1757,0.0,13,19,5,5245.60,264431,11514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50743,259182,17,6403,12376,167,2,0.4087,0.0,1614,177,61,5568.61,1132816346,26784
50744,259187,17,7964,12376,167,2,0.4087,0.0,1614,177,61,5592.57,1132844290,26784
50745,259192,17,11735,12376,167,2,0.4087,0.0,1614,177,61,5099.69,1132870860,26784
50746,259197,17,9455,12376,167,2,0.4087,0.0,1614,177,61,5438.35,1132894801,26784


In [25]:
class Experiment(NamedTuple):
    fuzzer: str
    isel: str
    arch: str
    replicate_id: int
    data: pd.DataFrame


def iterate_over_all_experiments(dir: str, allow_missing_data: bool = False) -> Iterable[Experiment]:
    for fuzzer_dir in subdirs_of(dir):
        fuzzer = fuzzer_dir.name
        for isel_dir in subdirs_of(fuzzer_dir.path):
            isel = isel_dir.name
            for arch_dir in subdirs_of(isel_dir.path):
                arch = arch_dir.name
                for replicate_dir in subdirs_of(arch_dir.path):
                    replicate_id = int(replicate_dir.name)
                    plot_data_path = os.path.join(
                        replicate_dir.path, 'default', 'plot_data'
                    )
                    try:
                        yield Experiment(
                            fuzzer, isel, arch, replicate_id,
                            read_experiment_data(plot_data_path)
                        )
                    except FileNotFoundError:
                        if not allow_missing_data:
                            raise


In [26]:
def combine_last_row_of_each_experiment_data(experiments: Iterable[Experiment], columns: List[str]) -> pd.DataFrame:
    return pd.DataFrame(
        columns=['fuzzer', 'isel', 'arch', 'replicate', *columns],
        data=(
            [
                exp.fuzzer,
                exp.isel,
                exp.arch,
                exp.replicate_id,
                *exp.data.tail(1)[columns].values.flatten().tolist()
            ]
            for exp in experiments
        )
    )


In [27]:
df = combine_last_row_of_each_experiment_data(
    iterate_over_all_experiments(
        '/home/peter/archives/combined',
        allow_missing_data=True
    ),
    columns=[
        '# relative_time', 'total_execs',
        'bit_cvg', 'shw_cvg', 'corpus_count'
    ]
)

df


Unnamed: 0,fuzzer,isel,arch,replicate,# relative_time,total_execs,bit_cvg,shw_cvg,corpus_count
0,libfuzzer,dagisel,aarch64,0,259196.0,3.478068e+07,0.79323,0.02574,23993.0
1,libfuzzer,dagisel,aarch64,1,259196.0,4.188732e+07,0.80125,0.02539,26353.0
2,libfuzzer,dagisel,aarch64,2,259196.0,3.677318e+07,0.79765,0.02545,24856.0
3,libfuzzer,dagisel,aarch64_32,0,259196.0,3.486497e+07,0.78960,0.02536,23822.0
4,libfuzzer,dagisel,aarch64_32,1,259196.0,4.159030e+07,0.79477,0.02534,26335.0
...,...,...,...,...,...,...,...,...,...
233,aflplusplus,dagisel,ve,4,259198.0,1.304326e+09,0.50230,0.00000,14804.0
234,aflplusplus,dagisel,ve,1,259198.0,1.246483e+09,0.42570,0.00000,13496.0
235,aflplusplus,dagisel,ve,3,259198.0,1.729388e+09,0.50070,0.00000,14037.0
236,aflplusplus,dagisel,ve,0,259198.0,1.132901e+09,0.40870,0.00000,12376.0


In [28]:
df.to_csv('last_row_of_each_experiment.csv', index=False)


In [4]:
df = pd.read_csv('last_row_of_each_experiment.csv')
df


Unnamed: 0,fuzzer,isel,arch,replicate,# relative_time,total_execs,bit_cvg,shw_cvg
0,libfuzzer,dagisel,aarch64,0,259196.0,3.478068e+07,0.79323,0.02574
1,libfuzzer,dagisel,aarch64,1,259196.0,4.188732e+07,0.80125,0.02539
2,libfuzzer,dagisel,aarch64,2,259196.0,3.677318e+07,0.79765,0.02545
3,libfuzzer,dagisel,aarch64_32,0,259196.0,3.486497e+07,0.78960,0.02536
4,libfuzzer,dagisel,aarch64_32,1,259196.0,4.159030e+07,0.79477,0.02534
...,...,...,...,...,...,...,...,...
233,aflplusplus,dagisel,ve,4,259198.0,1.304326e+09,0.50230,0.00000
234,aflplusplus,dagisel,ve,1,259198.0,1.246483e+09,0.42570,0.00000
235,aflplusplus,dagisel,ve,3,259198.0,1.729388e+09,0.50070,0.00000
236,aflplusplus,dagisel,ve,0,259198.0,1.132901e+09,0.40870,0.00000


In [30]:
df_summary = df \
    .drop(columns=['replicate']) \
    .groupby(['fuzzer', 'isel', 'arch']) \
    .agg(['min', 'max', 'count', 'mean', 'std']) \

df_summary


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# relative_time,# relative_time,# relative_time,# relative_time,# relative_time,total_execs,total_execs,total_execs,total_execs,total_execs,...,shw_cvg,shw_cvg,shw_cvg,shw_cvg,shw_cvg,corpus_count,corpus_count,corpus_count,corpus_count,corpus_count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,min,max,count,mean,std,min,max,count,mean,std,...,min,max,count,mean,std,min,max,count,mean,std
fuzzer,isel,arch,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
aflisel,dagisel,aarch64,26772.0,61111.0,5,52532.0,14859.079766,3073735.0,7926029.0,5,6.540121e+06,1.998673e+06,...,0.05847,0.06435,5,0.062336,0.002435,11134.0,19194.0,5,16908.400000,3266.088379
aflisel,dagisel,aarch64_32,53239.0,61107.0,5,59507.4,3504.236907,6508428.0,7936047.0,5,7.398488e+06,5.729805e+05,...,0.06296,0.06480,5,0.064038,0.000772,17049.0,18839.0,5,18058.800000,647.938037
aflisel,dagisel,aarch64_be,61438.0,61463.0,2,61450.5,17.677670,4586576.0,5630423.0,2,5.108500e+06,7.381113e+05,...,0.06240,0.06340,2,0.062900,0.000707,14830.0,18658.0,2,16744.000000,2706.804758
aflisel,dagisel,amdgcn,61041.0,61106.0,5,61073.4,25.774018,3434365.0,3643096.0,5,3.507550e+06,8.775521e+04,...,0.00656,0.01665,5,0.011034,0.003969,10814.0,12338.0,5,11371.200000,624.142372
aflisel,dagisel,arm,61437.0,61462.0,2,61449.5,17.677670,3417376.0,3572457.0,2,3.494916e+06,1.096588e+05,...,0.04050,0.04210,2,0.041300,0.001131,13129.0,13234.0,2,13181.500000,74.246212
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
libfuzzer,dagisel,systemz,259196.0,259196.0,3,259196.0,0.000000,36585664.0,41392664.0,3,3.978549e+07,2.771136e+06,...,0.25041,0.25526,3,0.252780,0.002427,27890.0,29323.0,3,28430.666667,778.539873
libfuzzer,dagisel,thumb,259196.0,259196.0,3,259196.0,0.000000,28201702.0,33038491.0,3,3.118221e+07,2.607017e+06,...,0.03762,0.03858,3,0.038063,0.000484,23595.0,25008.0,3,24305.333333,706.531198
libfuzzer,dagisel,ve,259196.0,259196.0,3,259196.0,0.000000,36203932.0,41075193.0,3,3.939305e+07,2.763242e+06,...,0.10338,0.10539,3,0.104633,0.001093,21003.0,22191.0,3,21750.666667,650.904243
libfuzzer,dagisel,wasm64,259196.0,259196.0,3,259196.0,0.000000,41261771.0,46116651.0,3,4.433396e+07,2.671995e+06,...,0.05078,0.05202,3,0.051423,0.000621,20121.0,21289.0,3,20872.000000,651.707757


In [18]:
df_summary.to_csv('summary.csv')
