In [1]:
# Import necessary script
from scripts.data_preprocessing import DataPreprocessor

In this script we will preprocess the specific datasets such that we can use them for the specific PPM approaches. The preprocessing steps are adapted from *Deep Learning for Predictive Business Process Monitoring: Review and Benchmark* of *Rama-Maneiro et al.* with the source code being available at: https://gitlab.citius.gal/efren.rama/pmdlcompararator

# Helpdesk

In [5]:
# Initialize DataPreprocessor class for Helpdesk
hd_preprocessor = DataPreprocessor(dataset_name='helpdesk')

# Apply preprocessing for all three implemented approaches
hd_preprocessor.preprocess_all_subfolders(approach='tax')
hd_preprocessor.preprocess_all_subfolders(approach='mauro')
hd_preprocessor.preprocess_all_subfolders(approach='processtransformer')

Approach: tax
Processing folder: helpdesk_baseline
Activity assignment:  {'Assign seriousness': 0, 'Closed': 1, 'Create SW anomaly': 2, 'DUPLICATE': 3, 'INVALID': 4, 'Insert ticket': 5, 'RESOLVED': 6, 'Require upgrade': 7, 'Resolve SW anomaly': 8, 'Resolve ticket': 9, 'Schedule intervention': 10, 'Take in charge ticket': 11, 'VERIFIED': 12, 'Wait': 13}
Processing folder: helpdesk_cvF_0.8_0.8_dc2_augm
Activity assignment:  {'Assign seriousness': 0, 'Closed': 1, 'Create SW anomaly': 2, 'DUPLICATE': 3, 'INVALID': 4, 'Insert ticket': 5, 'RESOLVED': 6, 'Require upgrade': 7, 'Resolve SW anomaly': 8, 'Resolve ticket': 9, 'Schedule intervention': 10, 'Take in charge ticket': 11, 'VERIFIED': 12, 'Wait': 13}
Processing folder: helpdesk_cvT_0.95_0.95_dc2_augm
Activity assignment:  {'Assign seriousness': 0, 'Closed': 1, 'Create SW anomaly': 2, 'DUPLICATE': 3, 'INVALID': 4, 'Insert ticket': 5, 'RESOLVED': 6, 'Require upgrade': 7, 'Resolve SW anomaly': 8, 'Resolve ticket': 9, 'Schedule intervention'

# Sepsis

In [6]:
# Initialize DataPreprocessor class for Sepsis
sepsis_preprocessor = DataPreprocessor(dataset_name='sepsis')

# Apply preprocessing for all three implemented approaches
sepsis_preprocessor.preprocess_all_subfolders(approach='tax')
sepsis_preprocessor.preprocess_all_subfolders(approach='mauro')
sepsis_preprocessor.preprocess_all_subfolders(approach='processtransformer')

Approach: tax
Processing folder: sepsis_baseline
Activity assignment:  {'Admission IC': 0, 'Admission NC': 1, 'CRP': 2, 'ER Registration': 3, 'ER Sepsis Triage': 4, 'ER Triage': 5, 'IV Antibiotics': 6, 'IV Liquid': 7, 'LacticAcid': 8, 'Leucocytes': 9, 'Release A': 10, 'Release B': 11, 'Release C': 12, 'Release D': 13, 'Release E': 14, 'Return ER': 15}
Processing folder: sepsis_cvF_0.8_0.8_dc2_augm
Activity assignment:  {'Admission IC': 0, 'Admission NC': 1, 'CRP': 2, 'ER Registration': 3, 'ER Sepsis Triage': 4, 'ER Triage': 5, 'IV Antibiotics': 6, 'IV Liquid': 7, 'LacticAcid': 8, 'Leucocytes': 9, 'Release A': 10, 'Release B': 11, 'Release C': 12, 'Release D': 13, 'Release E': 14, 'Return ER': 15}
Processing folder: sepsis_cvT_0.95_0.95_dc2_augm
Activity assignment:  {'Admission IC': 0, 'Admission NC': 1, 'CRP': 2, 'ER Registration': 3, 'ER Sepsis Triage': 4, 'ER Triage': 5, 'IV Antibiotics': 6, 'IV Liquid': 7, 'LacticAcid': 8, 'Leucocytes': 9, 'Release A': 10, 'Release B': 11, 'Release

# BPIC13C

In [4]:
# Initialize DataPreprocessor class for BPIC13C
bpic13c_preprocessor = DataPreprocessor(dataset_name='bpic13c')

# Apply preprocessing for all three implemented approaches
bpic13c_preprocessor.preprocess_all_subfolders(approach='tax')
bpic13c_preprocessor.preprocess_all_subfolders(approach='mauro')
bpic13c_preprocessor.preprocess_all_subfolders(approach='processtransformer')

Approach: tax
Processing folder: bpic13c_baseline
Activity assignment:  {'Accepted': 0, 'Completed': 1, 'Queued': 2, 'Unmatched': 3}
Processing folder: bpic13c_cvF_0.8_0.8_dc2_augm
Activity assignment:  {'Accepted': 0, 'Completed': 1, 'Queued': 2, 'Unmatched': 3}
Processing folder: bpic13c_cvT_0.95_0.95_dc2_augm
Activity assignment:  {'Accepted': 0, 'Completed': 1, 'Queued': 2, 'Unmatched': 3}
Processing folder: bpic13c_cvT_0.95_0.95_dc3_augm
Activity assignment:  {'Accepted': 0, 'Completed': 1, 'Queued': 2, 'Unmatched': 3}
Processing folder: bpic13c_orig
Activity assignment:  {'Accepted': 0, 'Completed': 1, 'Queued': 2, 'Unmatched': 3}
Approach: mauro
Processing folder: bpic13c_baseline
Processing folder: bpic13c_cvF_0.8_0.8_dc2_augm
Processing folder: bpic13c_cvT_0.95_0.95_dc2_augm
Processing folder: bpic13c_cvT_0.95_0.95_dc3_augm
Processing folder: bpic13c_orig
Approach: processtransformer
Processing folder: bpic13c_baseline
Coded activity:  {'x_word_dict': {'[PAD]': 0, '[UNK]': 1, 

# BPIC15_1

In [2]:
# Initialize DataPreprocessor class for BPIC15_1
bpic15_1_preprocessor = DataPreprocessor(dataset_name='bpic15_1')

# Apply preprocessing for all three implemented approaches
bpic15_1_preprocessor.preprocess_all_subfolders(approach='tax')
bpic15_1_preprocessor.preprocess_all_subfolders(approach='mauro')
bpic15_1_preprocessor.preprocess_all_subfolders(approach='processtransformer')

Approach: tax
Processing folder: bpic15_1_baseline
Activity assignment:  {'01_BB_540': 0, '01_BB_545': 1, '01_BB_546': 2, '01_BB_550': 3, '01_BB_550_1': 4, '01_BB_550_2': 5, '01_BB_560': 6, '01_BB_590': 7, '01_BB_630': 8, '01_BB_635': 9, '01_BB_636': 10, '01_BB_640': 11, '01_BB_670': 12, '01_BB_670_1': 13, '01_BB_670_2': 14, '01_BB_680': 15, '01_BB_700': 16, '01_BB_730': 17, '01_BB_740': 18, '01_BB_755': 19, '01_BB_760': 20, '01_BB_765': 21, '01_BB_766': 22, '01_BB_770': 23, '01_BB_775': 24, '01_HOOFD_010': 25, '01_HOOFD_011': 26, '01_HOOFD_015': 27, '01_HOOFD_020': 28, '01_HOOFD_030_1': 29, '01_HOOFD_030_2': 30, '01_HOOFD_040': 31, '01_HOOFD_050': 32, '01_HOOFD_055': 33, '01_HOOFD_060': 34, '01_HOOFD_061': 35, '01_HOOFD_065_0': 36, '01_HOOFD_065_1': 37, '01_HOOFD_065_2': 38, '01_HOOFD_080': 39, '01_HOOFD_090': 40, '01_HOOFD_099': 41, '01_HOOFD_100': 42, '01_HOOFD_100_0': 43, '01_HOOFD_101': 44, '01_HOOFD_101b': 45, '01_HOOFD_110': 46, '01_HOOFD_110_0': 47, '01_HOOFD_110_1': 48, '01_HO