In [None]:
import importlib
import sys
import torch
import pickle
import os
import pickle
from multiprocessing import Pool

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')


In [None]:
processed_prefixes = set()


def process_file(file_path):
    """
    Process a single pickle file and return its status along with any keys if processed.
    """
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f)
        if not isinstance(data, dict):
            print("No dict:", file_path)
            return (file_path, "Skipping: Not a dictionary", None)
        else:
            print("Processed:", file_path)
            return (file_path, "Processed", list(data.keys()))
    except Exception as e:
        print("Error:", file_path)
        return (file_path, f"Error: {e}", None)
    

input_dir  = '../../../../evaluation_results_ssd/BPIC17/gn'

files = [os.path.join(input_dir, fname) 
            for fname in os.listdir(input_dir) if fname.endswith(".pkl")]


# Create a pool of workers
with Pool(processes=2) as pool:
    results = pool.map(process_file, files)

processed_prefixes = set()
# Process the results in the main process
for file_path, status, keys in results:
    print(f"{status} - {os.path.basename(file_path)}")
    if keys is not None:
        # Extend the shared list with keys from this file
        processed_prefixes.update(keys)

# Optionally, convert to a set to remove duplicates
print(f"Unique processed prefixes: {processed_prefixes}")


with open('BPIC2017_processed_prefixes.pkl', 'wb') as f:
    pickle.dump(processed_prefixes, f)




In [None]:
processed_prefixes

In [None]:
# Load the dataset
file_path_data_set = '../../../encoded_data/BPIC_2017_all_5_test.pkl'
bpic_17_test_dataset = torch.load(file_path_data_set, weights_only=False)

import src.evaluation.probabilistic_evaluation
importlib.reload(src.evaluation.probabilistic_evaluation)
from src.evaluation.probabilistic_evaluation import ProbabilisticEvaluation

#device = torch.device("cuda")  # Use the first available GPU
#model = model.to(device)

new_eval = ProbabilisticEvaluation(None, bpic_17_test_dataset,
                                   num_processes=32,
                                   growing_num_values = ['case_elapsed_time'],
                                   samples_per_case = 1000,
                                   sample_argmax = False,
                                   use_variance_cat = True,
                                   use_variance_num = True,
                                   )

all_prefixes = set()
for i, (case_name, prefix_len, prefix, predicted_suffixes, suffix, mean_prediction) in enumerate(new_eval.count_only(random_order=False)):
    all_prefixes.add((case_name, prefix_len))


remaining_prefixes = all_prefixes - processed_prefixes

with open('BPIC2017_remaining_prefixes.pkl', 'wb') as f:
    pickle.dump(remaining_prefixes, f)

print(len(all_prefixes))
print(len(remaining_prefixes))

print(remaining_prefixes)
