In [9]:
import pandas as pd
import numpy as np
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
import re

import warnings
warnings.filterwarnings("ignore")

### Purchase Process Case Study

In [10]:
def compute_start(log_path, log_alpha_path):   

    log = xes_importer.apply(log_path)
    log_df = pm4py.convert_to_dataframe(log)
    log_df.sort_values(by='time:timestamp', inplace=True)
    log_df = log_df[log_df['lifecycle:transition'] != 'complete']
    log_df.index = range(len(log_df))

    log_alpha = pd.read_csv(log_alpha_path)  
    log_alpha.sort_values(by='time:timestamp', inplace = True)
    log_alpha.index = range(len(log_alpha))
    log_alpha['start:timestamp'] = log_alpha['start:timestamp'].apply(lambda x: re.sub('\.[0-9]+', '', str(x)))
    log_alpha['start:timestamp'] = pd.to_datetime(log_alpha['start:timestamp'])

    time_delta = (log_df['time:timestamp'] - log_alpha['start:timestamp']).apply(lambda x: x.total_seconds()).abs().mean()

    activities = list(log_df['concept:name'].unique())
    delta_starts_median = {a:[] for a in activities}
    delta_starts_mean = {a:[] for a in activities}
    for a in activities:
        delta_starts_median[a] = np.median([abs(a_i-b_i) for a_i,b_i in zip(log_df.loc[log_df['concept:name']==activities[1],'time:timestamp'], log_alpha.loc[log_alpha['concept:name']==activities[1],'start:timestamp'])])
        delta_starts_mean[a] = np.mean([abs(a_i-b_i) for a_i,b_i in zip(log_df.loc[log_df['concept:name']==activities[1],'time:timestamp'], log_alpha.loc[log_alpha['concept:name']==activities[1],'start:timestamp'])])

    return time_delta, delta_starts_mean, delta_starts_median

In [11]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'results/Purchase_Process_Case_Study/bisection/log_alpha.csv'

print('Purchase Process Case Study - bisection')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study - bisection


parsing log, completed traces :: 100%|██████████| 608/608 [00:03<00:00, 201.82it/s]


secs: 11165.660050444129

mins: 186.09433417406882

hours: 3.10157223623448


In [12]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'results/Purchase_Process_Case_Study/single/log_alpha_one.csv'

print('Purchase Process Case Study - single')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study - single


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 234.13it/s]


secs: 17421.24991775414

mins: 290.3541652959023

hours: 4.8392360882650385


In [13]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'results/Purchase_Process_Case_Study/alpha_1/log_alpha.csv'

print('Purchase Process Case Study - single alpha=1')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study - single alpha=1


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 233.33it/s]


secs: 38941.25671674526

mins: 649.0209452790876

hours: 10.817015754651461


### Purchase Process Case Study 2

In [14]:
def compute_start_log(log_path, log_alpha_path):   

    log = xes_importer.apply(log_path)
    log_df = pm4py.convert_to_dataframe(log)
    log_df.sort_values(by='time:timestamp', inplace=True)
    log_df = log_df[log_df['lifecycle:transition'] != 'complete']
    log_df.index = range(len(log_df))

    log_alpha = xes_importer.apply(log_alpha_path)
    log_alpha = pm4py.convert_to_dataframe(log_alpha)
    log_alpha = log_alpha.sort_values(by='time:timestamp')
    log_alpha.index = range(len(log_alpha))
    log_alpha = log_alpha.loc[:,['org:resource','time:timestamp','start:timestamp','concept:name']].sort_values(by='time:timestamp')
    log_alpha['start:timestamp'] = pd.to_datetime(log_alpha['start:timestamp'])

    time_delta = (log_df['time:timestamp'] - log_alpha['start:timestamp']).apply(lambda x: x.total_seconds()).abs().mean()

    activities = list(log_df['concept:name'].unique())
    delta_starts_median = {a:[] for a in activities}
    delta_starts_mean = {a:[] for a in activities}
    for a in activities:
        delta_starts_median[a] = np.median(abs(log_df.loc[log_df['concept:name']==a,'time:timestamp']-log_alpha.loc[log_alpha['concept:name']==a,'start:timestamp']))
        delta_starts_mean[a] = np.mean(abs(log_df.loc[log_df['concept:name']==a,'time:timestamp']-log_alpha.loc[log_alpha['concept:name']==a,'start:timestamp']))

    return time_delta, delta_starts_mean, delta_starts_median

In [15]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'data/Purchase_Process_Case_Study_2/alpha_log_WT_FIXED_greedy.xes'

print('Purchase Process Case Study 2 - alpha_log_WT_FIXED_greedy')
time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study 2 - alpha_log_WT_FIXED_greedy


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 234.36it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 308.83it/s]


secs: 44635.868625945826

mins: 743.9311437657637

hours: 12.398852396096062


In [16]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'data/Purchase_Process_Case_Study_2/alpha_log_WT_FIXED_MOGAII_001.xes'

print('Purchase Process Case Study 2 - alpha_log_WT_FIXED_MOGAII_001')
time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study 2 - alpha_log_WT_FIXED_MOGAII_001


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 232.12it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 409.52it/s]


secs: 35091.76743941222

mins: 584.8627906568704

hours: 9.747713177614505


In [17]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'data/Purchase_Process_Case_Study_2/alpha_log_WT_FIXED_MOGAII_005.xes'

print('Purchase Process Case Study 2 - alpha_log_WT_FIXED_MOGAII_005')
time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study 2 - alpha_log_WT_FIXED_MOGAII_005


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 233.01it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 355.70it/s]


secs: 36397.63406075227

mins: 606.6272343458712

hours: 10.11045390576452


In [18]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'data/Purchase_Process_Case_Study_2/alpha_log_WT_FIXED_NSGAII_001.xes'

print('Purchase Process Case Study 2 - alpha_log_WT_FIXED_NSGAII_001')
time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study 2 - alpha_log_WT_FIXED_NSGAII_001


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 226.85it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 347.17it/s]


secs: 36379.23181927843

mins: 606.3205303213072

hours: 10.105342172021786


In [19]:
log_path = 'data/Purchase_Process_Case_Study/purchasing_example.xes'
log_alpha_path = 'data/Purchase_Process_Case_Study_2/alpha_log_WT_FIXED_NSGAII_005.xes'

print('Purchase Process Case Study 2 - alpha_log_WT_FIXED_NSGAII_005')
time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Purchase Process Case Study 2 - alpha_log_WT_FIXED_NSGAII_005


parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 234.15it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 369.15it/s]


secs: 36390.078298059

mins: 606.5013049676501

hours: 10.108355082794168


### Production Case Study

In [20]:
log_path = 'data/Production_Case_Study/production.xes'
log_alpha_path = 'results/Production_Case_Study/bisection/log_alpha.csv'

print('Production Case Study - bisection')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Production Case Study - bisection


parsing log, completed traces :: 100%|██████████| 225/225 [00:01<00:00, 122.26it/s]


secs: 12148.596002422773

mins: 202.47660004037957

hours: 3.3746100006729924


In [21]:
log_path = 'data/Production_Case_Study/production.xes'
log_alpha_path = 'results/Production_Case_Study/single/log_alpha_one.csv'

print('Production Case Study - single')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Production Case Study - single


parsing log, completed traces :: 100%|██████████| 225/225 [00:01<00:00, 112.54it/s]


secs: 20478.648899656775

mins: 341.3108149942796

hours: 5.688513583237993


In [22]:
log_path = 'data/Production_Case_Study/production.xes'
log_alpha_path = 'results/Production_Case_Study/single/log_alpha_one_shuffle.csv'

print('Production Case Study - single shuffle')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Production Case Study - single shuffle


parsing log, completed traces :: 100%|██████████| 225/225 [00:01<00:00, 137.38it/s]


secs: 22895.34201494044

mins: 381.5890335823407

hours: 6.3598172263723445


In [23]:
log_path = 'data/Production_Case_Study/production.xes'
log_alpha_path = 'results/Production_Case_Study/alpha_1/log_alpha.csv'

print('Production Case Study - single alpha=1')
time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

Production Case Study - single alpha=1


parsing log, completed traces :: 100%|██████████| 225/225 [00:01<00:00, 132.48it/s]


secs: 44487.70442156269

mins: 741.4617403593782

hours: 12.357695672656302
