In [1]:
import pandas as pd
import numpy as np
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
import re

import warnings
warnings.filterwarnings("ignore")

In [2]:
def compute_start(log_path, log_alpha_path):   

    log = xes_importer.apply(log_path)
    log_df = pm4py.convert_to_dataframe(log)
    log_df.sort_values(by='time:timestamp', inplace=True)
    log_df = log_df[log_df['lifecycle:transition'] != 'complete']
    log_df.index = range(len(log_df))

    log_alpha = pd.read_csv(log_alpha_path)  
    log_alpha.sort_values(by='time:timestamp', inplace = True)
    log_alpha.index = range(len(log_alpha))
    log_alpha['start:timestamp'] = log_alpha['start:timestamp'].apply(lambda x: re.sub('\.[0-9]+', '', str(x)))
    log_alpha['start:timestamp'] = pd.to_datetime(log_alpha['start:timestamp'])

    time_delta = (log_df['time:timestamp'] - log_alpha['start:timestamp']).apply(lambda x: x.total_seconds()).abs().mean()

    activities = list(log_df['concept:name'].unique())
    delta_starts_median = {a:[] for a in activities}
    delta_starts_mean = {a:[] for a in activities}
    for a in activities:
        delta_starts_median[a] = np.median([abs(a_i-b_i) for a_i,b_i in zip(log_df.loc[log_df['concept:name']==activities[1],'time:timestamp'], log_alpha.loc[log_alpha['concept:name']==activities[1],'start:timestamp'])])
        delta_starts_mean[a] = np.mean([abs(a_i-b_i) for a_i,b_i in zip(log_df.loc[log_df['concept:name']==activities[1],'time:timestamp'], log_alpha.loc[log_alpha['concept:name']==activities[1],'start:timestamp'])])

    return time_delta, delta_starts_mean, delta_starts_median

In [3]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'results/bisection/log_alpha.csv'

time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 270.13it/s]


secs: 11299.698980151332

mins: 188.32831633585553

hours: 3.1388052722642588


In [4]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'results/single/log_alpha_one.csv'

time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 242.96it/s]


secs: 17421.24991775414

mins: 290.3541652959023

hours: 4.8392360882650385


In [5]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'results/alpha_1/log_alpha.csv'

time_delta, delta_starts_mean, delta_starts_median = compute_start(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 261.67it/s]


secs: 38941.25671674526

mins: 649.0209452790876

hours: 10.817015754651461


In [6]:
def compute_start_log(log_path, log_alpha_path):   

    log = xes_importer.apply(log_path)
    log_df = pm4py.convert_to_dataframe(log)
    log_df.sort_values(by='time:timestamp', inplace=True)
    log_df = log_df[log_df['lifecycle:transition'] != 'complete']
    log_df.index = range(len(log_df))

    log_alpha = xes_importer.apply(log_alpha_path)
    log_alpha = pm4py.convert_to_dataframe(log_alpha)
    log_alpha = log_alpha.sort_values(by='time:timestamp')
    log_alpha.index = range(len(log_alpha))
    log_alpha = log_alpha.loc[:,['org:resource','time:timestamp','start:timestamp','concept:name']].sort_values(by='time:timestamp')
    log_alpha['start:timestamp'] = pd.to_datetime(log_alpha['start:timestamp'])

    time_delta = (log_df['time:timestamp'] - log_alpha['start:timestamp']).apply(lambda x: x.total_seconds()).abs().mean()

    activities = list(log_df['concept:name'].unique())
    delta_starts_median = {a:[] for a in activities}
    delta_starts_mean = {a:[] for a in activities}
    for a in activities:
        delta_starts_median[a] = np.median(abs(log_df.loc[log_df['concept:name']==a,'time:timestamp']-log_alpha.loc[log_alpha['concept:name']==a,'start:timestamp']))
        delta_starts_mean[a] = np.mean(abs(log_df.loc[log_df['concept:name']==a,'time:timestamp']-log_alpha.loc[log_alpha['concept:name']==a,'start:timestamp']))

    return time_delta, delta_starts_mean, delta_starts_median

In [7]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'data2/alpha_log_WT_FIXED_greedy.xes'

time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 260.70it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 390.50it/s]


secs: 44635.868625945826

mins: 743.9311437657637

hours: 12.398852396096062


In [8]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'data2/alpha_log_WT_FIXED_MOGAII_001.xes'

time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 282.76it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 434.49it/s]


secs: 35091.76743941222

mins: 584.8627906568704

hours: 9.747713177614505


In [9]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'data2/alpha_log_WT_FIXED_MOGAII_005.xes'

time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 264.99it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 408.57it/s]


secs: 36397.63406075227

mins: 606.6272343458712

hours: 10.11045390576452


In [10]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'data2/alpha_log_WT_FIXED_NSGAII_001.xes'

time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 245.70it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 430.11it/s]


secs: 36379.23181927843

mins: 606.3205303213072

hours: 10.105342172021786


In [11]:
log_path = 'data/purchasing_example.xes'
log_alpha_path = 'data2/alpha_log_WT_FIXED_NSGAII_005.xes'

time_delta, delta_starts_mean, delta_starts_median = compute_start_log(log_path, log_alpha_path)
print('secs:',time_delta)
print("\nmins:", time_delta/60)
print("\nhours:", time_delta/(60*60))

parsing log, completed traces :: 100%|██████████| 608/608 [00:02<00:00, 257.66it/s]
parsing log, completed traces :: 100%|██████████| 608/608 [00:01<00:00, 334.73it/s]


secs: 36390.078298059

mins: 606.5013049676501

hours: 10.108355082794168
