In [1]:
import sys
import pandas as pd
import numpy as np
import stats
import os
import re
from statistics import mean
from frozendict import frozendict
from utils import get_occ_dicts

In [2]:
INPUT_DIR = os.path.join('..','output')
PREFIX_ALIGN_DIR = os.path.join(INPUT_DIR, 'prefix-alignment')
PREFIX_W_2_DIR = os.path.join(PREFIX_ALIGN_DIR, 'window-size-2')
PREFIX_W_INF_DIR = os.path.join(PREFIX_ALIGN_DIR, 'window-size-maximum')
HMMCONF_DIR = os.path.join(INPUT_DIR,'hmmconf')
C_3PO_DIR = os.path.join(INPUT_DIR,'C-3PO')

LOGS = ["BPI_2017","BPI_2012","M1","M2","M4","M8","M9"]
LOG_TYPES = ['completeness20', 'completeness50', 'sim']

## Datasets
note: Kristo can share his insights
- completeness logs
- regular logs
- model types

Additional:
1. Summary statistics
    - trace length distribution
    - unique acitivities
2. Behaviour (parallelism, XOR etc)

In [19]:
occ_ = [C_3PO_DIR, PREFIX_W_2_DIR, PREFIX_W_INF_DIR]

print("C-3PO:")        
runs_dict_C_3PO = get_occ_dicts(occ_[0], True)
print(runs_dict_C_3PO['BPI_2017']['completeness20'].columns)

print(80*'-')

print("prefix-alignment:")  
runs_dict_pref_w_2 = get_occ_dicts(occ_[1])
print(runs_dict_pref['BPI_2017']['completeness20'].columns)

print(80*'-')

print("prefix-alignment:")  
runs_dict_pref_w_inf = get_occ_dicts(occ_[2])
print(runs_dict_pref['BPI_2017']['completeness20'].columns)

C-3PO:
Index(['TraceId', 'total cost', 'Completeness cost', 'Conformance cost',
       'ExecutionTime'],
      dtype='object')
--------------------------------------------------------------------------------
prefix-alignment:
Index(['TraceId', 'Conformance cost', 'ExecutionTime'], dtype='object')
--------------------------------------------------------------------------------
prefix-alignment:
Index(['TraceId', 'Conformance cost', 'ExecutionTime'], dtype='object')


## Stress test

- Operating system: Windows 11 Home 64-bit
- Processor: AMD Ryzen 7 5800 8-Core Processor (16 CPUs) ~3.4 GHz
- Memory: 16384MB RAM

Stress test was carried out by running each algorithm on the datasets 5 times and averaging the execution times to level out system resource interference while running the experiments.


### Average computation time per trace

In [21]:
log_names = [list(zip(3*[log_n], LOG_TYPES)) for log_n in LOGS]
indexes = [l_n+'_'+l_t for l in log_names for (l_n, l_t) in l]
columns = ['C-3PO', 'W-2', 'W-inf']

In [48]:
occs = [runs_dict_C_3PO, runs_dict_pref_w_2, runs_dict_pref_w_inf]
data = [[mean(d[l_n][l_t]['ExecutionTime']) for ln_lt in log_names for (l_n, l_t) in ln_lt] for d in occs]
data = [[round(data[0][i],3), round(data[1][i],3), round(data[2][i],3)] for i in range(len(data[0]))]
results = pd.DataFrame(index=indexes, data=data, columns=columns)
#print(results.idxmax(axis=1).index)
#print(results.idxmax(axis=1))

def highlight_max(s):
    is_max = s == s.max()
    return ['color: green' if cell else '' for cell in is_max]
  
results.style.apply(highlight_max)
  
#results.style.highlight_max(color = 'lightgreen', axis = 0)

Unnamed: 0,C-3PO,W-2,W-inf
BPI_2017_completeness20,26.113,25.92,23.182
BPI_2017_completeness50,16.323,15.83,14.028
BPI_2017_sim,25.027,21.261,17.975
BPI_2012_completeness20,26.8,24.418,24.008
BPI_2012_completeness50,16.756,16.384,15.533
BPI_2012_sim,25.687,22.264,19.122
M1_completeness20,4.964,9.046,6.098
M1_completeness50,4.408,6.35,5.944
M1_sim,3.885,3.639,3.503
M2_completeness20,8.944,12.676,8.866
