In [1]:
# Import necessary packages
import pm4py
import pandas as pd
import json
import os
import shutil

from augm_baseline.ml.core.model import AugmentationExperiment
from augm_baseline.ml.core import model
from augm_baseline.ml.pipeline.augmentation_pipeline import run_pipeline

# Generate baseline data

In this notebook we apply the augmentation appraoch *Model-Agnostic Event Log Augmentation for Predictive Process Monitoring* from *M. Käppel* and *S. Jablonski* with the source code available at: https://github.com/mkaep/pbpm-ssl-suite

In [2]:
# Generate log file for training data
for name in ['helpdesk', 'sepsis', 'bpic13c', 'bpic15_1']:

    # Load data
    train_orig = pd.read_csv(f'data\\{name}\\{name}_orig\\train_{name}_orig.csv')
    train_orig['time:timestamp'] = pd.to_datetime(train_orig['time:timestamp'])
    train_orig[['case:concept:name', 'concept:name']] = train_orig[['case:concept:name', 'concept:name']].astype(str)

    # Convert it to an event log and save it
    train_xes = pm4py.convert_to_event_log(train_orig)
    pm4py.write_xes(train_xes, f'data\\{name}\\{name}_orig\\train_{name}_orig_80.xes')

  from .autonotebook import tqdm as notebook_tqdm
exporting log, completed traces :: 100%|██████████| 2931/2931 [00:00<00:00, 3278.68it/s]
exporting log, completed traces :: 100%|██████████| 672/672 [00:00<00:00, 1131.92it/s]
exporting log, completed traces :: 100%|██████████| 951/951 [00:00<00:00, 3540.13it/s]
exporting log, completed traces :: 100%|██████████| 767/767 [00:02<00:00, 321.91it/s]


After generating the event log files we can now load the defined experiments and create the augmented training data based on this baseline approach.

In [3]:
for name in ['helpdesk', 'sepsis', 'bpic13c', 'bpic15_1']:

    # Load experiment from baseline folder
    with open(f'augm_baseline\\experiments\\aug_{name}.json', 'r') as file:
        exp_data = json.load(file)

    # Generate experiment
    experiment = AugmentationExperiment.from_dict(exp_data)

    # Run the augmentation pipeline
    run_pipeline(experiment, verbose=True)

    # Load augmented data
    df = pd.read_csv(f'augm_baseline\\data\\{name}\\3_mixed_True_1.6\\train_3_mixed_True_1.6_augm.csv')
    df = df[['case:concept:name', 'concept:name', 'time:timestamp']]
    
    # Adjust case IDs as in some approaches we need numerical case IDs
    df['case:concept:name'] = df['case:concept:name'].str.replace('_', '', regex = False)

    # Save baseline training data
    os.makedirs(f'data\\{name}\{name}_baseline', exist_ok=True)
    df.to_csv(f'data\\{name}\\{name}_baseline\\train_{name}_baseline.csv', index=False)

    # Load original validation and test data
    df_val = pd.read_csv(f'data\\{name}\\{name}_orig\\val_{name}_orig.csv')
    df_test = pd.read_csv(f'data\\{name}\\{name}_orig\\test_{name}_orig.csv')

    # Generate full dataset with augmented training data
    df_total = pd.concat([df, df_val, df_test]).reset_index(drop=True)

    # Save full dataset
    total_path = f'data\\{name}\\{name}_baseline\\{name}_baseline.csv'
    df_total.to_csv(total_path, index=False)

    # Move original validation and test data to new folder
    for type in ['val', 'test']:
        source = f'data\\{name}\\{name}_orig\\{type}_{name}_orig.csv'
        dest = f'data\\{name}\\{name}_baseline\\{type}_{name}_baseline.csv'

        shutil.copy2(src=source, dst=dest)

 Start creating folder for 1 event logs and 0 approaches
 Create directory for event log helpdesk
 Load event log from data\helpdesk\helpdesk_orig\train_helpdesk_orig_80.xes


parsing log, completed traces :: 100%|██████████| 2931/2931 [00:01<00:00, 1855.70it/s]


Start augmentation...
BEFORE FIT:
[RandomInsertion, Activities: []]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: []]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
AFTER FIT:
[RandomInsertion, Activities: ['Resolve ticket', 'Closed', 'Resolve SW anomaly', 'Schedule intervention', 'RESOLVED', 'VERIFIED', 'Assign seriousness', 'INVALID', 'Insert ticket', 'Create SW anomaly', 'Take in charge ticket', 'Wait']]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: ['Resolve ticket', 'Closed', 'Resolve SW anomaly', 'Schedule intervention', 'RESOLVED', 'VERIFIED', 'Assign seriousness', 'INVALID', 'Insert ticket', 'Create SW anomaly', 'Take in charge ticket', 'Wait']]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
Starting augmentation
Augmentor could not appli

parsing log, completed traces :: 100%|██████████| 672/672 [00:01<00:00, 666.30it/s]


Start augmentation...
BEFORE FIT:
[RandomInsertion, Activities: []]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: []]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
AFTER FIT:
[RandomInsertion, Activities: ['Return ER', 'LacticAcid', 'Release E', 'Release C', 'ER Registration', 'Admission NC', 'Release B', 'Leucocytes', 'IV Antibiotics', 'Release A', 'Release D', 'Admission IC', 'ER Triage', 'IV Liquid', 'ER Sepsis Triage', 'CRP']]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: ['Return ER', 'LacticAcid', 'Release E', 'Release C', 'ER Registration', 'Admission NC', 'Release B', 'Leucocytes', 'IV Antibiotics', 'Release A', 'Release D', 'Admission IC', 'ER Triage', 'IV Liquid', 'ER Sepsis Triage', 'CRP']]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.

parsing log, completed traces :: 100%|██████████| 951/951 [00:00<00:00, 2993.56it/s]


Start augmentation...
BEFORE FIT:
[RandomInsertion, Activities: []]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: []]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
AFTER FIT:
[RandomInsertion, Activities: ['Queued', 'Completed', 'Unmatched', 'Accepted']]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: ['Queued', 'Completed', 'Unmatched', 'Accepted']]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
Starting augmentation
Augmentor could not applied to this trace. We try to augment an other trace or with an other augmentor
Augmentor could not applied to this trace. We try to augment an other trace or with an other augmentor
Augmentor could not applied to this trace. We try to augment an other trace or with an other augmentor
Augmentor could not applie

parsing log, completed traces :: 100%|██████████| 767/767 [00:02<00:00, 285.09it/s]


Start augmentation...
BEFORE FIT:
[RandomInsertion, Activities: []]
[RandomDeletion]
[ParallelSwap]
[FragmentAugmentation]
[ReworkActivity]
[DeleteReworkActivity]
[RandomReplacement, Activities: []]
[RandomSwap]
[LoopAugmentation, max_additional_repetitions: 3, duration_tolerance: 0.2]
AFTER FIT:
[RandomInsertion, Activities: ['04_BPT_010', '08_AWB45_175', '07_OPS_080_1', '11_AH_II_040a', '05_EIND_040', '11_AH_II_130', '12_AP_030', '07_OPS_055', '02_DRZ_010_2', '08_AWB45_070_3', '11_AH_II_070_1', '06_VD_060_1', '01_HOOFD_465', '10_UOV_050_2a', '12_AP_UOV_050', '01_HOOFD_193', '11_AH_II_040b', '01_HOOFD_210_0', '01_HOOFD_210_1', '06_VD_030_1a', '11_AH_II_020_2', '08_AWB45_070_4', '08_AWB45_020_1', '10_UOV_050_1', '08_AWB45_070_1a', '09_AH_I_040', '07_OPS_020', '02_DRZ_030_0', '11_AH_II_055', '06_VD_060_2a', '01_HOOFD_809', '06_VD_020_2', '05_EIND_060', '01_BB_765', '01_HOOFD_811', '01_HOOFD_050', '01_HOOFD_101', '06_VD_030_3a', '01_HOOFD_250_2', '08_AWB45_045', '01_HOOFD_490_1a', '10_UO