In [1]:
# Miners
from pm4py import serialize, deserialize
from pm4py import discover_dfg as dfg_discovery

from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner


# Evaluators
import pandas as pd
import re
import pm4py
import sys
import dask
from pm4py.discovery import DFG
from pm4py import discover_dfg_typed

from pm4py.algo.evaluation.simplicity import algorithm as simplicity #simplicity
from pm4py.algo.evaluation.replay_fitness import algorithm as fitness_alignment #fitness
from pm4py.algo.evaluation.generalization import algorithm as generalization #generalization

In [2]:
import time
import atexit

start_time = time.time()

# Register a function to be called before the kernel dies
@atexit.register
def log_execution_time():
    end_time = time.time()
    execution_time = end_time - start_time
    file1 = open('myfile.txt', 'w')
    L = ["kernel died after {} seconds".format(execution_time)]
    file1.writelines(L)

    # Closing file
    file1.close()
    print(f"Execution time: {execution_time} seconds")

In [3]:
import timeit

def useExecutionTime(func):
    def wrapper(*args, **kwargs):
        start_time = timeit.default_timer()
        result = func(*args, **kwargs)
        end_time = timeit.default_timer()
        execution_time = end_time - start_time
        return {'result': result, 'execution_time': execution_time}
    return wrapper

In [4]:
@useExecutionTime
def getMinerResult(dfg, miner, threshold = 0.5):
    result = {}
    if miner == 'heuristic_miner':
        net, im, fm = heuristics_miner.apply_dfg(dfg['dfg'], parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: threshold})
    elif miner == 'inductive_miner':
        net, im, fm = pm4py.discover_petri_net_inductive(dfg['dfgObj'])
    elif miner == 'alpha_miner':
        net, im, fm = alpha_miner.apply_dfg(dfg['dfg'])
    
    result[miner] = serialize(net, im, fm)
    
    return result
    
def setMiners(dfg):
    lazyList = []
    miners = [
        'heuristic_miner',
        'inductive_miner',
        'alpha_miner'
    ]
    for miner in miners:
        task = getMinerResult(dfg, miner)
        lazyList.append(task)
    
    return lazyList

In [5]:
@useExecutionTime
def getMetrics(log, miner, metric, net, im, fm):
    sys.setrecursionlimit(3000)
    try:
        result = {
            miner: {
                metric: 0
            }
        }
        if metric == 'fitness':
            result[miner][metric] = fitness_alignment.apply(log, net, im, fm)['log_fitness']
        elif metric == 'simplicity':
            result[miner][metric] = simplicity.apply(net)
        elif metric == 'precision':
            result[miner][metric] = pm4py.precision_alignments(log, net, im, fm)
        elif metric == 'generalization':
            result[miner][metric] = generalization.apply(log, net, im, fm)

        return result
    except Exception as e:
        return {miner: {metric: {"error": e}}}

def setMetrics(log, miners):
    results = []
    metrics = [
        'fitness',
        'simplicity',
        'precision',
        'generalization'
    ]
    
    for metric in metrics:
        for miner in miners:
            algorithm = list(miner['result'].keys())[0]
            net, im, fm = deserialize(miner['result'][algorithm])
            task = getMetrics(log, algorithm, metric, net, im, fm)
            results.append(task)
    
    return results

In [6]:
# data_files = ['BPIC15_1.csv', 'BPIC15_2.csv', 'BPIC15_3.csv', 'BPIC15_4.csv', 'BPIC15_5.csv']
# dataframe = pd.concat((pd.read_csv(filename) for filename in data_files))

In [7]:
# fileName = '-'.join(data_files)

In [8]:
fileName = 'BPI_2014_Detail Incident Activity'

In [9]:
dataframe = pd.read_csv('{}.csv'.format(fileName), encoding="ISO-8859-1", sep=';')

In [10]:
dataframe = dataframe.rename(columns={"Incident ID": "case:concept:name", "IncidentActivity_Type": "concept:name", "DateStamp": "time:timestamp"})

In [11]:
dataframe['time:timestamp'] = pd.to_datetime(dataframe['time:timestamp'], utc=True)
dataframe['concept:name'] = dataframe['concept:name'].astype(str)
dataframe['case:concept:name'] = dataframe['case:concept:name'].astype(str)

In [12]:
start_time = timeit.default_timer()
dfg, start_activities, end_activities = dfg_discovery(dataframe)
end_time = timeit.default_timer()
dfgDiscoveryExecutionTime = end_time - start_time

In [13]:
dfgObj = DFG(dfg, start_activities=start_activities, end_activities=end_activities)

In [14]:
minersResults = setMiners({"dfgObj": dfgObj, "dfg": dfg})

In [None]:
metricsResults = setMetrics(dataframe, minersResults)

replaying log with TBR, completed variants ::   0%|          | 0/31725 [00:00<?, ?it/s]

aligning log, completed variants ::   0%|          | 0/31725 [00:00<?, ?it/s]

replaying log with TBR, completed variants ::   0%|          | 0/31725 [00:00<?, ?it/s]

computing precision with alignments, completed variants ::   0%|          | 0/248024 [00:00<?, ?it/s]

In [None]:
def getStatisticalDataFrames(metricsResults, minersResults):

    resultsPerMiner = {}
    metricsExecutionTimePerMiner = {}
    minerExecutionTime = {}

    for result in metricsResults:
        miner = list(result['result'].keys())[0]
        execution_time = result['execution_time']
        metricKey = list(result['result'][miner].keys())[0]
        metricValue = result['result'][miner][metricKey]

        resultsPerMiner.setdefault(miner, {})
        resultsPerMiner[miner].setdefault(metricKey, None)

        metricsExecutionTimePerMiner.setdefault(miner, {})
        metricsExecutionTimePerMiner[miner].setdefault(metricKey, None)

        if resultsPerMiner[miner][metricKey] == None:
            resultsPerMiner[miner][metricKey] = metricValue

        if metricsExecutionTimePerMiner[miner][metricKey] == None:
            metricsExecutionTimePerMiner[miner][metricKey] = execution_time
            
    for result in minersResults:
        miner = list(result['result'].keys())[0]
        execution_time = result['execution_time']
        minerExecutionTime.setdefault(miner, execution_time)
        minerExecutionTime[miner] = execution_time
            
    resultsPerMiner['data_set'] = fileName
    metricsExecutionTimePerMiner['data_set'] = fileName
    minerExecutionTime['data_set'] = fileName
            
    return [
        pd.DataFrame(resultsPerMiner),
        pd.DataFrame(metricsExecutionTimePerMiner),
        pd.DataFrame(minerExecutionTime, index=['execution_time'])
    ]

In [None]:
results, execution_times, miner_execution_time = getStatisticalDataFrames(metricsResults, minersResults)

In [None]:
results

In [None]:
execution_times

In [None]:
miner_execution_time

In [None]:
# execution_times.to_csv('./results/1 - centralized setup/{}_metrics_execution_times.csv'.format(fileName))

In [None]:
# results.to_csv('./results/1 - centralized setup/{}_results.csv'.format(fileName))

In [None]:
# miner_execution_time.to_csv('./results/1 - centralized setup/{}_miner_execution_time.csv'.format(fileName))