In [7]:
import os
import re
import pandas as pd
from IPython.display import display

## folder containing analysis files
RANGE_RESULTS_DIR = "Range_Results"

## regular expressions for extracting data
benchmark_pattern = re.compile(r"### Benchmark: (.+)")
restricted_pattern = re.compile(r"Restricted variables: (\d+)/(\d+)")
timeout_pattern = re.compile(r"TIME OUT")
error_pattern = re.compile(r"ERROR")

## metrics
benchmarks = []
total_restricted = 0
total_variables = 0
timeout_count = 0
error_count = 0
benchmark_data = [] # data for each individual benchmark, containing (benchmark_name, restricted, total)

## processing file by file, where each file contains multiple benchmarks
for filename in os.listdir(RANGE_RESULTS_DIR):
    if not filename.endswith(".txt"):
        continue
    file_path = os.path.join(RANGE_RESULTS_DIR, filename)
    if os.path.isfile(file_path):
        with open(file_path, "r") as f:
            lines = f.readlines()
            current_benchmark = None
            
            for line in lines:
                bench_match = benchmark_pattern.match(line)
                if bench_match:
                    current_benchmark = bench_match.group(1)
                    continue
                
                restricted_match = restricted_pattern.match(line)
                if restricted_match:
                    restricted = int(restricted_match.group(1))
                    total = int(restricted_match.group(2))
                    total_restricted += restricted
                    total_variables += total
                    benchmarks.append(current_benchmark)
                    benchmark_data.append((current_benchmark, restricted, total))
                    continue
                
                if timeout_pattern.search(line):
                    timeout_count += 1
                    continue
                
                if error_pattern.search(line):
                    error_count += 1
                    continue

## converting all benchmark data to DataFrame
df = pd.DataFrame(benchmark_data, columns=["Benchmark", "Restricted", "Total Variables"])
## adding 'Restricted Percentage' columns
df["Restricted Percentage"] = df["Restricted"] / df["Total Variables"] * 100

In [8]:
## some more metrics
num_benchmarks = len(df)
avg_restricted_per_benchmark = df["Restricted"].mean()
avg_restricted_percentage = df["Restricted Percentage"].mean()

## displaying results
print(f"Number of benchmarks: {num_benchmarks}")
print(f"Total restricted variables: {total_restricted}/{total_variables}")
print(f"Percentage of restricted variables: {(total_restricted / total_variables) * 100:.2f}%")

Number of benchmarks: 559
Total restricted variables: 152776/4504380
Percentage of restricted variables: 3.39%


In [9]:
print(f"Average restricted percentage per benchmark: {avg_restricted_percentage:.2f}%")

Average restricted percentage per benchmark: 18.43%


In [10]:
print(f"Number of timeouts: {timeout_count}/{num_benchmarks}")

Number of timeouts: 142/559


In [11]:
## benchmarks where > 50% of variables are restricted
high_restricted = df[df["Restricted Percentage"] > 50]

print("\nBenchmarks with > 50% restricted variables:")
print(f"Amount: {len(high_restricted)}/{num_benchmarks}")
display(high_restricted.style.hide(axis='index'))


Benchmarks with > 50% restricted variables:
Amount: 31/559


Benchmark,Restricted,Total Variables,Restricted Percentage
./benchmarks/motivating/adder.circom,6,9,66.666667
./benchmarks/ed25519-099d19c-fixed/binmullessthan51.circom,2,3,66.666667
./benchmarks/circomlibex-cff5ab6/GreaterEqThan@comparators@circomlib_16.circom,18,25,72.0
./benchmarks/circomlibex-cff5ab6/GreaterEqThan@comparators@circomlib_32.circom,34,41,82.926829
./benchmarks/circomlibex-cff5ab6/LessEqThan@comparators@circomlib_32.circom,34,41,82.926829
./benchmarks/circomlibex-cff5ab6/Num2BitsNeg@bitify@circomlib_16.circom,17,21,80.952381
./benchmarks/circomlibex-cff5ab6/LessEqThan@comparators@circomlib_16.circom,18,25,72.0
./benchmarks/circomlibex-cff5ab6/Num2Bits@bitify@circomlib_32.circom,33,34,97.058824
./benchmarks/circomlibex-cff5ab6/GreaterThan@comparators@circomlib_32.circom,34,41,82.926829
./benchmarks/circomlibex-cff5ab6/Num2Bits@bitify@circomlib_16.circom,17,18,94.444444


In [12]:
## benchmarks where < 20% of variables are restricted
low_restricted = df[df["Restricted Percentage"] < 20]

print("\nBenchmarks with < 20% restricted variables:")
print(f"Amount: {len(low_restricted)}/{num_benchmarks}")
display(low_restricted.style.hide(axis='index'))


Benchmarks with < 20% restricted variables:
Amount: 216/559


Benchmark,Restricted,Total Variables,Restricted Percentage
./benchmarks/motivating/VDFixed.circom,1,33,3.030303
./benchmarks/motivating/VDBuggy.circom,2,27,7.407407
./benchmarks/motivating/ValidateDecodingFixed.circom,1,8,12.5
./benchmarks/buggy-mix/tornado-core-ce97895/withdraw.circom,497,51965,0.956413
./benchmarks/buggy-mix/min0-tornado-core-ce97895/withdraw.circom,1,96,1.041667
./benchmarks/buggy-mix/iden3-core-3a3a300/credentialAtomicQuerySigTest.circom,7482,202482,3.695143
./benchmarks/buggy-mix/circomlib-79d3034/test-mimcsponge.circom,1,39,2.564103
./benchmarks/gnark-plonky2-verifier/int/reduce.unsafe.sr1cs,1,108,0.925926
./benchmarks/gnark-plonky2-verifier/int/inverse.unsafe.sr1cs,1,7,14.285714
./benchmarks/gnark-plonky2-verifier/int/exp.safe.sr1cs,1,18,5.555556
