In [None]:
import os
import sys
import pickle

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')

from load_data.load_conformal_data import load_suffix_results

from conformance_checking.conformance import ConformanceChecking

from conformance_checking.conformance_results import ConformanceResults

In [None]:
# Load model and validation set and determine output directory:

# Sepsis:
log_name = "Sepsis"
path_to_be_model = "../../../data/process_models/Sepsis.bpmn"
path_calibration_samples_results = "../../../../../data/Sepsis/proact_conf_check/eval_test_set/"
output_dir   = "../../../../../data/Sepsis/proact_conf_check/online_prediction/"

# Helpdesk
# path_to_be_model = "../../../data/process_models/Helpdesk.bpmn"
# path_calibration_samples_results = "../../../../../data/Helpdesk/proact_conf_check/eval_test_set/"
# output_dir   = "../../../../../data/Helpdesk/proact_conf_check/online_prediction/"

In [None]:
# Compute the fitness scores for all probabilistic suffix samples for all cases in the validation dataset:

# MC Suffix Sampling Results
results = load_suffix_results(path=path_calibration_samples_results)

# Conformance Checking object:
cc = ConformanceChecking(path_process_model=path_to_be_model)

# Evaluate the probabilistic suffix samples using conformance checking:
cfr = ConformanceResults(log_name=log_name, data=results, conformance_object=cc)
calibration_conformance_dict = cfr.conformance_results(target_workers=64)
print("Finished conformance checking computation")

# Testing purpose
# test = cfr.quick_fitness()

os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists

# Batch size to store multiple tuples
n = 50

num_cases = len(calibration_conformance_dict['case_id'])
total_saved = 0
for i in range(0, num_cases, n):
    batch = {k: v[i:i+n] for k, v in calibration_conformance_dict.items()}
    
    chunk_number = (i//n+1) * n
    filename = os.path.join(output_dir, f'conformance_calibration_part_{chunk_number:03d}.pkl')
    
    with open(filename, 'wb') as f:
        pickle.dump(batch, f)
    batch_size = len(batch['case_id'])
    total_saved += batch_size
    print(f"Saved {batch_size} entries to {filename}")

print(f"Saved a total of {total_saved} entries across { (num_cases + n - 1) // n } files")

print("All files stored!")