# Evaluation

In [3]:
from dataclasses import dataclass
import copy
import dateutil
import pandas as pd
import os

@dataclass
class FileSize:
    num_bytes: int = -1
    nodes: int = -1

@dataclass
class ReducedFile:
    best_size: FileSize = FileSize()
    predicate_calls: int = -1
    predicate_fails: int = -1
    time_seconds: int = -1

df = pd.read_csv("file_sizes.csv")
file_sizes = {}
for row in df.iterrows():
    err_nodes = row[1][3]
    assert err_nodes == 0, "Tree-Sitter could not parse one of the benchmark files"
    file_sizes[row[1][0]] = FileSize(row[1][1], row[1][2])

for key in file_sizes.keys(): print(key, file_sizes[key])

Unnamed: 0,test,tool,bytes_before,bytes_after,nodes_before,nodes_after,err_nodes_before,err_nodes_after,time_sec,predicate_calls,failed_compiles,failed_runs,other
0,test01,creduce,93,27,48,15,0,1,6.67,812,0,62,
1,test01,perses,93,33,48,22,0,0,1.45,19,0,7,
2,test01,bric-ddmin,93,93,48,48,0,0,0.05,2,0,0,
3,test01,bric-hdd,93,52,48,1,0,1,3.66,373,0,21,
4,test01,bric-br,93,93,48,48,0,0,0.03,1,0,0,
5,test01,bric-gbr,93,93,48,48,0,0,0.03,1,0,0,
6,test02,creduce,160,30,72,15,0,1,8.62,991,0,70,
7,test02,perses,160,72,72,39,0,0,1.8,34,0,13,
8,test02,bric-ddmin,160,160,72,72,0,0,0.08,3,0,0,
9,test02,bric-hdd,160,92,72,37,0,2,6.57,662,0,38,


In [None]:
REDUCERS = ["creduce", "perses", "bric-ddmin", "bric-hdd", "bric-br", "bric-gbr"]
reducers_results = {}
subdirs = sorted([d.path for d in os.scandir(".") if d.is_dir and d.name.startswith("clang")])
for subdir in subdirs:
    subdir_name = os.path.split(subdir)[-1]
    os.chdir(subdir)
    for reducer_name in REDUCERS:
        benchmark = pd.read_csv(f"{reducer_name}_output.csv", header=None)
        reduced_file = ReducedFile()
        reduced_file.predicate_calls = len(benchmark.index)
        reduced_file.predicate_fails = len(benchmark.loc[benchmark[3] != 0].values)

        time_start = dateutil.parser.parse(benchmark[0][0])
        time_end = dateutil.parser.parse(benchmark.iloc[-1][1])
        reduced_file.time_seconds = (time_end - time_start).total_seconds()

        last_successful_predicate = None
        for row in reversed(list(benchmark.iterrows())):
            predicate_return_code = row[1][3]
            if predicate_return_code == 0:
                last_successful_predicate = row[1]
                break

        reduced_file.best_size.num_bytes = last_successful_predicate[2]
        reducers_results[(reducer_name, subdir_name)] = copy.deepcopy(reduced_file)

    os.chdir("..")

for key1 in reducers_results.keys():
    print(key1)
    print(reducers_results[key1])