# Evaluation

In [None]:
from dataclasses import dataclass
import copy
import dateutil
import glob
import pandas as pd

@dataclass
class FileSize:
    num_bytes: int = -1
    nodes: int = -1

@dataclass
class ReducedFile:
    num_bytes: int = -1
    predicate_calls: int = -1
    predicate_fails: int = -1
    time_seconds: int = -1

# Load the benchmark file sizes before they are reduced
df = pd.read_csv("file_sizes.csv")
file_sizes = {}
for row in df.iterrows():
    err_nodes = row[1][3]
    assert err_nodes == 0, "Tree-Sitter could not parse one of the benchmark files"
    file_sizes[row[1][0]] = FileSize(row[1][1], row[1][2])

for key in file_sizes.keys(): print(key, file_sizes[key])

In [None]:
# Load the csv files generated by run_benchmarks.py
# In each generated csv file has 4 columns:
# time_before, time_after, size_bytes, return_code
REDUCERS = ["creduce", "perses", "bric-ddmin", "bric-hdd", "bric-br", "bric-gbr"]
reducers_results = {}
for benchmark_name in glob.glob("**/*_output.csv"):
    benchmark = pd.read_csv(benchmark_name, header=None)
    reduced_file = ReducedFile()
    reduced_file.predicate_calls = len(benchmark.index)
    reduced_file.predicate_fails = len(benchmark.loc[benchmark[3] != 0].values)

    time_start = dateutil.parser.parse(benchmark[0][0])
    time_end = dateutil.parser.parse(benchmark.iloc[-1][1])
    reduced_file.time_seconds = (time_end - time_start).total_seconds()

    last_successful_predicate = None
    for row in reversed(list(benchmark.iterrows())):
        predicate_return_code = row[1][3]
        if predicate_return_code == 0:
            last_successful_predicate = row[1]
            break

    if last_successful_predicate is None:
        print(f"{benchmark_name} None")
    else:
        reduced_file.num_bytes = last_successful_predicate[2]
        print(
            f"{benchmark_name:<40} {reduced_file.num_bytes:<10} {reduced_file.time_seconds}s {reduced_file.predicate_calls}/{reduced_file.predicate_fails}"
        )
        reducers_results[benchmark_name] = copy.deepcopy(reduced_file)