In [None]:
from result_processor.utils import process_results
from cpog_verifier.utils import verify_with_cpog
import os

In [None]:
base_path = "$USER/SharpVelvet/out/"

results_file_name = "name"

results_input_path = os.path.join(base_path, results_file_name + ".csv")

In [None]:
# results_before_cpog_verification = process_results(results_input_path)
# results = verify_with_cpog(results_before_cpog_verification)

# OR

results = process_results(results_input_path)
# where results_input_path points to the output of the cristian tool script

In [None]:
grouped_results = results.groupby('counter')

for counter, group in grouped_results:
    verifier_error_rate = group[~group['cpog_message'].isin(['NO ERROR', 'UNSAT'])].shape[0] / group.shape[0]
    accuracy = group['count_matches'].value_counts(normalize=True).get(True, 0)
    
    print(f"Counter: {counter}")
    print(f"Verifier Error Rate: {verifier_error_rate * 100:.2f}%")
    print(f"Accuracy: {accuracy * 100:.2f}%\n")

In [None]:
satzilla_features = process_results("/path/to/satzilla/output.csv")

In [None]:
# Filter out instances we can't verify with CPOG
verifiable = (results['cpog_message'].isin(["NO ERROR", "UNSAT"]) | results['verified'] == True)
verified_results = results[verifiable].copy()  # Create explicit copy
verified_features = satzilla_features[verifiable].copy()  # Create explicit copy

# Separate into correct and incorrect counts among verifiable instances
correct_count = (verified_results['verified'] == True) & \
                (verified_results['count_matches'] == True) & \
                (verified_results['cpog_message'].isin(["NO ERROR", "UNSAT"]))

# Split the satzilla features
correct_data = verified_features[correct_count].copy()
incorrect_data = verified_features[~correct_count].copy()

# Initialize count_diff column with zeros
verified_results['count_diff'] = 0

# Calculate count differences for incorrect instances using loc
mask = ~correct_count
verified_results.loc[mask, 'count_diff'] = abs(
    verified_results.loc[mask, 'cpog_count'] - 
    verified_results.loc[mask, 'count_value']
)

# Print summary statistics
print("Total instances:", len(results))
print("Verifiable instances:", len(verified_results))
print("Number of correct instances:", len(correct_data))
print("Number of incorrect instances:", len(incorrect_data))
print("Number of non-zero count differences:", (verified_results['count_diff'] > 0).sum())