In [10]:
import pandas as pd
import json
import os
import re
from pathlib import Path
from datetime import datetime

PROJECT_ROOT=None
pwd = os.getcwd()
if pwd.endswith("scripts"):
    PROJECT_ROOT = os.path.dirname(pwd)
elif pwd.endswith("Reflecta"):
    PROJECT_ROOT = pwd
else:
    print("Please run the script within the project")
    exit(1)

print(f"PROJECT_ROOT={PROJECT_ROOT}")

def get_metadata(full_path):
    path = Path(full_path)
    dir_with_metadata = path.parent.parent.name
    filename = path.name

    # Parse metadata
    metadata = {}
    match = re.match(r'[^:]+:(.+)', dir_with_metadata)  # Skip 'bench:' part
    if match:
        fields = match.group(1).split(',')
        for field in fields:
            if ':' in field:
                key, val = field.split(':', 1)
                if key == "fuzzer":
                    val = val.split('-')[0]
                if key == "date":
                    val = datetime.fromisoformat(val)
                metadata[key] = val
    return metadata



all_stats = []
all_coverage = []

# find all JSON files
for root, dirs, files in os.walk(os.path.join(PROJECT_ROOT, "bench")):
    for file in files:
        if file.endswith('.json'):
            full_path = os. path.join(root, file)
            data = get_metadata(full_path)
            with open(full_path, 'r') as f:
                data1 = json.load(f)
                data.update(data1)
                all_stats.append(data)
        if file.endswith('coverage_over_time.csv'):
            full_path = os. path.join(root, file)
            data = get_metadata(full_path)
            target = data.get('target')
            fuzzer = data.get('fuzzer')
            start = None
            with open(full_path, 'r') as f:
                for line in f:
                    if line.startswith('timestamp'):
                        continue
                    timestamp, coverage = line.strip().split(',')
                    timestamp = float(timestamp)
                    if start is None:
                        start = timestamp
                    timestamp -= start
                    all_coverage.append({
                        'timestamp': timestamp,
                        'coverage': int(coverage),
                        'target': target,
                        'fuzzer': fuzzer
                    })
                start = None

df_stats = pd.DataFrame(all_stats)
display(df_stats.head(1))
df_coverage = pd.DataFrame(all_coverage)
df_coverage = df_coverage.sort_values(by='timestamp', ascending=True)
display(df_coverage.head(1))

PROJECT_ROOT=/home/liuqiang/projects/Reflecta


Unnamed: 0,fuzzer,target,date,totalSamples,validSamples,interestingSamples,timedOutSamples,totalExecs,avgProgramSize,avgCorpusProgramSize,execsPerSecond,fuzzerOverhead,coverage,correctnessRate,timeoutRate,crashingSamples
0,reflecta,v8,2025-04-22 11:36:56+00:00,4459,3869,350,20,16622,9.915,3.7,26.463345,0.010184,0.033091,0.781,0.003,


Unnamed: 0,timestamp,coverage,target,fuzzer
0,0.0,23879,v8,reflecta


In [11]:
# Generate the table of correctness rate
pivot_table = df_stats.pivot(index='target', columns='fuzzer', values='correctnessRate')
display(pivot_table)

fuzzer,fuzzilli,reflecta
target,Unnamed: 1_level_1,Unnamed: 2_level_1
micropython,,0.492
mruby,,0.782
php,,
ruby,,0.819
v8,0.631,0.781


In [13]:
# Generate the table of coverage over time
import seaborn as sns
import matplotlib.pyplot as plt

fuzzer_palette = { 'nautilus': 'orange', 'polyglot': 'green', 'fuzzilli': 'red', 'reflecta': 'blue'}
g = sns.FacetGrid(df_coverage, col="target", col_wrap=3, height=4, sharey=False, sharex=False)
g.map_dataframe(sns.lineplot, x="timestamp", y="coverage", hue="fuzzer", palette=fuzzer_palette)
g.add_legend()
g.set_axis_labels("Time (s)", "Coverage")
g.set_titles(col_template="{col_name}")
plt.tight_layout()
plt.show()

SyntaxError: unmatched '}' (145282730.py, line 6)