In [1]:
#importers
from pm4py import convert_to_event_log, convert_to_dataframe, format_dataframe

# Miners
from pm4py import convert_to_petri_net, serialize, deserialize
from pm4py import discover_dfg as dfg_discovery

from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.correlation_mining import algorithm as correlation_miner
from pm4py.algo.discovery.temporal_profile import algorithm as temporal_profile_discovery


# Evaluators
from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness_evaluator
from pm4py.algo.evaluation.precision import algorithm as precision_evaluator
from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
import pandas as pd
import re
import pm4py

In [2]:
def getMinerResult(dfg, miner, threshold = 0.5):
    result = {}
    if miner == 'heuristic_miner':
        net, im, fm = heuristics_miner.apply_dfg(dfg, parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: threshold})
    elif miner == 'inductive_miner':
        net, im, fm = inductive_miner.apply_dfg(dfg)
    elif miner == 'alpha_miner':
        net, im, fm = alpha_miner.apply_dfg(dfg)
    
    result[miner] = serialize(net, im, fm)
    return result

def getMetrics(metric, log, petrinet, im, fm):
    result = {}
    if metric == 'fitness':
        result[metric] = replay_fitness_evaluator.apply(log, petrinet, im, fm, variant=replay_fitness_evaluator.TOKEN_BASED)
    elif metric == 'simplicity':
        result[metric] = simplicity_evaluator.apply(petrinet)
    elif metric == 'precision':
        result[metric] = precision_evaluator.apply(log, petrinet, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN)
    elif metric == 'generalization':
        result[metric] = generalization_evaluator.apply(log, petrinet, im, fm)
    
    return result

In [3]:
def setMiners(dfg):
    outputs = []
    miners = [
        'heuristic_miner', 
        'inductive_miner',
        'alpha_miner'
    ]
    for miner in miners:
        task = getMinerResult(dfg, miner)
        outputs.append(task)
    
    return outputs

def setMetrics(dfg, petrinet, im, fm):
    outputs = []
    metrics = [
        'fitness', 
        'simplicity', 
        'precision', 
        'generalization'
    ]
    
    for metric in metrics:
        task = getMetrics(metric, dfg, petrinet, im, fm)
        outputs.append(task)
    
    return outputs

In [4]:
df = pd.read_csv('BPIC15_1.csv')

  df = pd.read_csv('BPIC15_1.csv')


In [5]:
dfg, start_activities, end_activities = dfg_discovery(df)

In [6]:
%%time
heuristicMiner = getMinerResult(dfg, 'inductive_miner')

CPU times: user 1.3 s, sys: 64.8 ms, total: 1.36 s
Wall time: 1.36 s


In [11]:
net, im, fm = deserialize(heuristicMiner['inductive_miner'])

peak memory: 384.65 MiB, increment: 0.00 MiB


In [8]:
%load_ext memory_profiler

In [9]:
%memit fitnessMetric = getMetrics('precision', df, net, im, fm)

replaying log with TBR, completed variants ::   0%|          | 0/38934 [00:00<?, ?it/s]

peak memory: 1590.88 MiB, increment: 1301.13 MiB


In [16]:
%memit

peak memory: 384.82 MiB, increment: 0.00 MiB
