In [1]:
import importlib
import sys
import torch
import pickle
import os
import pickle
from multiprocessing import Pool

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')



In [None]:
processed_prefixes = set()

def process_file(file_path):
    """
    Process a single pickle file and return its status along with any keys if processed.
    """
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f)
        if not isinstance(data, dict):
            print("No dict:", file_path)
            return (file_path, "Skipping: Not a dictionary", None)
        else:
            print("Processed:", file_path)
            return (file_path, "Processed", list(data.keys()))
    except Exception as e:
        print("Error:", file_path)
        return (file_path, f"Error: {e}", None)

input_dir  = '../../../../../../../evaluation_results/BPIC17/gn_l4'

files = [os.path.join(input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(".pkl")]

# Create a pool of workers
with Pool(processes=2) as pool:
    results = pool.map(process_file, files)

processed_prefixes = set()
# Process the results in the main process
for file_path, status, keys in results:
    print(f"{status} - {os.path.basename(file_path)}")
    if keys is not None:
        # Extend the shared list with keys from this file
        processed_prefixes.update(keys)

# Optionally, convert to a set to remove duplicates
print(f"Unique processed prefixes: {processed_prefixes}")

with open('BPIC2017_processed_prefixes_norm_4layer.pkl', 'wb') as f:
    pickle.dump(processed_prefixes, f)


In [None]:
processed_prefixes

{('Application_151062598', 8),
 ('Application_357513341', 18),
 ('Application_1289247242', 26),
 ('Application_2012114184', 67),
 ('Application_325807242', 73),
 ('Application_309853327', 84),
 ('Application_1477137664', 23),
 ('Application_421198957', 3),
 ('Application_1693308901', 9),
 ('Application_1759206616', 28),
 ('Application_1809555136', 18),
 ('Application_2058568369', 8),
 ('Application_1837606480', 24),
 ('Application_341433848', 9),
 ('Application_1462952668', 25),
 ('Application_402530805', 20),
 ('Application_1607821509', 30),
 ('Application_1022619955', 16),
 ('Application_1313000546', 17),
 ('Application_1956874703', 37),
 ('Application_1919971626', 3),
 ('Application_204736971', 12),
 ('Application_1677525801', 12),
 ('Application_1863366950', 20),
 ('Application_1018089402', 40),
 ('Application_1550035458', 24),
 ('Application_1667028841', 20),
 ('Application_1153327019', 9),
 ('Application_1716698141', 10),
 ('Application_171473969', 8),
 ('Application_1005568510',

In [None]:
# Load the dataset
file_path_data_set = '../../../../../encoded_data/BPIC_2017_all_5_test.pkl'
bpic_17_test_dataset = torch.load(file_path_data_set, weights_only=False)

import src.evaluation.probabilistic_evaluation
importlib.reload(src.evaluation.probabilistic_evaluation)
from src.evaluation.probabilistic_evaluation import ProbabilisticEvaluation
from model.dropout_uncertainty_enc_dec_LSTM.dropout_uncertainty_model import DropoutUncertaintyEncoderDecoderLSTM

#load model
file_path_model = '../../../training_variational_dropout/BPIC17/BPIC_2017_full_grad_norm_new_4layer.pkl'
model = DropoutUncertaintyEncoderDecoderLSTM.load(file_path_model, dropout=0.1)

#device = torch.device("cuda")  # Use the first available GPU
#model = model.to(device)

new_eval = ProbabilisticEvaluation(model, bpic_17_test_dataset,
                                   num_processes=32,
                                   growing_num_values = ['case_elapsed_time'],
                                   samples_per_case = 1000,
                                   sample_argmax = False,
                                   use_variance_cat = True,
                                   use_variance_num = True,
                                   all_cat= ['concept:name', 'org:resource', 'lifecycle:transition'],
                                   all_num= ['case_elapsed_time', 'event_elapsed_time']
                                   )

all_prefixes = set()
for i, (case_name, prefix_len, prefix, predicted_suffixes, suffix, mean_prediction) in enumerate(new_eval.count_only(random_order=False)):
    all_prefixes.add((case_name, prefix_len))

remaining_prefixes = all_prefixes - processed_prefixes

with open('BPIC2017_processed_prefixes_norm_4layer.pkl', 'wb') as f:
    pickle.dump(processed_prefixes, f)

print(len(all_prefixes))
print(len(remaining_prefixes))

print(remaining_prefixes)


Data set categories:  ([('concept:name', 28, {'A_Accepted': 1, 'A_Cancelled': 2, 'A_Complete': 3, 'A_Concept': 4, 'A_Create Application': 5, 'A_Denied': 6, 'A_Incomplete': 7, 'A_Pending': 8, 'A_Submitted': 9, 'A_Validating': 10, 'EOS': 11, 'O_Accepted': 12, 'O_Cancelled': 13, 'O_Create Offer': 14, 'O_Created': 15, 'O_Refused': 16, 'O_Returned': 17, 'O_Sent (mail and online)': 18, 'O_Sent (online only)': 19, 'W_Assess potential fraud': 20, 'W_Call after offers': 21, 'W_Call incomplete files': 22, 'W_Complete application': 23, 'W_Handle leads': 24, 'W_Personal Loan collection': 25, 'W_Shortened completion ': 26, 'W_Validate application': 27}), ('Action', 7, {'Created': 1, 'Deleted': 2, 'EOS': 3, 'Obtained': 4, 'Released': 5, 'statechange': 6}), ('org:resource', 151, {'EOS': 1, 'User_1': 2, 'User_10': 3, 'User_100': 4, 'User_101': 5, 'User_102': 6, 'User_103': 7, 'User_104': 8, 'User_105': 9, 'User_106': 10, 'User_107': 11, 'User_108': 12, 'User_109': 13, 'User_11': 14, 'User_110': 15, 'U

  0%|          | 0/4682 [00:00<?, ?it/s]

174065
37065
{('Application_629867343', 20), ('Application_538869641', 34), ('Application_769120467', 38), ('Application_775396942', 12), ('Application_444033179', 18), ('Application_786482604', 20), ('Application_650411814', 4), ('Application_481060211', 51), ('Application_618316810', 7), ('Application_818383830', 12), ('Application_792953680', 42), ('Application_607309528', 25), ('Application_576568224', 2), ('Application_742154312', 10), ('Application_537293333', 62), ('Application_747521358', 31), ('Application_74750542', 15), ('Application_443925254', 3), ('Application_703001372', 42), ('Application_577740264', 26), ('Application_560023666', 4), ('Application_806535408', 36), ('Application_461148192', 12), ('Application_547842527', 29), ('Application_53038581', 6), ('Application_597788673', 7), ('Application_597073052', 8), ('Application_574024988', 13), ('Application_582409896', 31), ('Application_815092525', 14), ('Application_773698744', 30), ('Application_696591930', 16), ('Ap

: 