<a href="https://colab.research.google.com/github/NicoRota-0/BIS-project/blob/main/BIS_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install pm4py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pm4py
  Downloading pm4py-2.2.22-py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 7.7 MB/s 
[?25hCollecting pyvis
  Downloading pyvis-0.2.1.tar.gz (21 kB)
Collecting jsonpickle
  Downloading jsonpickle-2.2.0-py2.py3-none-any.whl (39 kB)
Collecting stringdist
  Downloading StringDist-1.0.9.tar.gz (7.4 kB)
Collecting deprecation
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Building wheels for collected packages: pyvis, stringdist
  Building wheel for pyvis (setup.py) ... [?25l[?25hdone
  Created wheel for pyvis: filename=pyvis-0.2.1-py3-none-any.whl size=23688 sha256=4bcc917ed50977f52833a03b8fcca05bc424b2c1b4b25b0f7c89d0ca0d635039
  Stored in directory: /root/.cache/pip/wheels/2a/8f/04/6340d46afc74f59cc857a594ca1a2a14a1f4cbd4fd6c2e9306
  Building wheel for stringdist (setup.py) ... [?25l[?25hdone
  Created wheel for stringdist: file

In [2]:
import pandas as pd
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.algo.filtering.log.timestamp import timestamp_filter
from pm4py.algo.filtering.log.start_activities import start_activities_filter
from pm4py.algo.filtering.log.attributes import attributes_filter

In [3]:
PATH = 'drive/MyDrive/BIS_files/'

In [4]:
def load_filter_log(file_name, date1 = "2018-01-01 00:00:00", date2 = "2019-12-31 23:59:59"):
  """This function loads and filter a log from a xes file,
  the filter eliminates all the cases with events outside the interval
  defined by the two timestamp in input.
  """
  tmp_log = xes_importer.apply(PATH + file_name + '.xes')

  activities_all = attributes_filter.get_attribute_values(tmp_log, "id")
  list_activities_id_all = len(list(activities_all))

  # filtering all the events happened/started in 2017
  filtered_log = timestamp_filter.filter_traces_contained(tmp_log, date1, date2)

  activities_all = attributes_filter.get_attribute_values(filtered_log, "id")
  
  list_activities_id_filt = len(list(activities_all))

  print(file_name + ' Data Before filtering:', str(list_activities_id_all), 'Events;', str(len(tmp_log)), 'Cases.')
  print(file_name + ' Data After filtering:', str(list_activities_id_filt), 'Events;', str(len(filtered_log)), 'Cases.')
  
  return filtered_log

**Loading and Filtering Events that started in 2017**

In [5]:
domesticDeclarations = load_filter_log('DomesticDeclarations')
internationalDeclarations = load_filter_log('InternationalDeclarations')
permitLog = load_filter_log('PermitLog')
prepaidTravelCost = load_filter_log('PrepaidTravelCost')
requestForPayment = load_filter_log('RequestForPayment')

parsing log, completed traces ::   0%|          | 0/10500 [00:00<?, ?it/s]

DomesticDeclarations Data Before filtering: 56437 Events; 10500 Cases.
DomesticDeclarations Data After filtering: 46375 Events; 8260 Cases.


parsing log, completed traces ::   0%|          | 0/6449 [00:00<?, ?it/s]

InternationalDeclarations Data Before filtering: 69073 Events; 6449 Cases.
InternationalDeclarations Data After filtering: 55576 Events; 4951 Cases.


parsing log, completed traces ::   0%|          | 0/7065 [00:00<?, ?it/s]

PermitLog Data Before filtering: 86581 Events; 7065 Cases.
PermitLog Data After filtering: 71063 Events; 5596 Cases.


parsing log, completed traces ::   0%|          | 0/2099 [00:00<?, ?it/s]

PrepaidTravelCost Data Before filtering: 16205 Events; 2099 Cases.
PrepaidTravelCost Data After filtering: 14024 Events; 1776 Cases.


parsing log, completed traces ::   0%|          | 0/6886 [00:00<?, ?it/s]

RequestForPayment Data Before filtering: 36796 Events; 6886 Cases.
RequestForPayment Data After filtering: 31820 Events; 5778 Cases.



**Process Discovery**

In [9]:
# process mining 
from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.algo.discovery.batches import algorithm 

# viz
from pm4py.visualization.petri_net import visualizer as pn_visualizer
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.visualization.heuristics_net import visualizer as hn_visualizer
from pm4py.visualization.dfg import visualizer as dfg_visualization
from pm4py.objects.conversion.process_tree import converter as pt_converter

from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness_evaluator
from pm4py.algo.evaluation.precision import algorithm as precision_evaluator
from pm4py.algo.evaluation.generalization import algorithm as generalization_evaluator
from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator

In [11]:
def net_evaluation(event_log, net, im, fm):
  fitness = replay_fitness_evaluator.apply(event_log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED)
  print('Fitness: ', fitness)

  prec = precision_evaluator.apply(event_log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN)
  print('Precision: ', prec)

  gen = generalization_evaluator.apply(event_log, net, im, fm)
  print('Generalization: ', gen)

  simp = simplicity_evaluator.apply(net)
  print('Simplicity: ', simp)

In [17]:
def func(event_log, visualize=True):
  if visualize:
    # Create graph from log
    dfg = dfg_discovery.apply(event_log)
    # Visualise
    gviz = dfg_visualization.apply(dfg, log=event_log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz)

  # Discover a Petri Net using Alpha Miner
  net, im, fm = alpha_miner.apply(event_log)
  if visualize:
  # Visualise 
    gviz = pn_visualizer.apply(net, im, fm, variant=pn_visualizer.Variants.FREQUENCY, log=event_log)
    pn_visualizer.view(gviz)
  # Evaluate
  net_evaluation(event_log, net, im, fm)

  # Discover a Petri Net using Heuristic Miner
  parameters = {heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.7, heuristics_miner.Variants.CLASSIC.value.Parameters.MIN_ACT_COUNT: 4, heuristics_miner.Variants.CLASSIC.value.Parameters.MIN_DFG_OCCURRENCES: 20} 
  net, im, fm = heuristics_miner.apply(event_log, parameters=parameters)
  if visualize:
    # Visualise a Petri Net
    gviz = pn_visualizer.apply(net, im, fm, log=event_log)
    pn_visualizer.view(gviz)
  # Evaluate
  net_evaluation(event_log, net, im, fm)

  # Discover process tree using Inductive Miner Infrequent
  # Produces a more precise model, without fitness guarantees, by eliminating some behavior.
  tree = inductive_miner.apply_tree(event_log)
  # Convert process tree into a Petri Net
  net, im, fm = pt_converter.apply(tree)
  if visualize:
  ## Visualise
    gviz = pn_visualizer.apply(net, im, fm, 
                              variant=pn_visualizer.Variants.FREQUENCY, 
                              log=event_log)
    pn_visualizer.view(gviz)
  # Evaluate
  net_evaluation(event_log, net, im, fm)

In [19]:
func(internationalDeclarations, visualize = False)

replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Fitness:  {'perc_fit_traces': 0.0, 'average_trace_fitness': 0.6762500958077613, 'log_fitness': 0.6807790588305668, 'percentage_of_fitting_traces': 0.0}


replaying log with TBR, completed variants ::   0%|          | 0/2852 [00:00<?, ?it/s]

Precision:  0.0


replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Generalization:  0.846257201709615
Simplicity:  0.41732283464566927


replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Fitness:  {'perc_fit_traces': 40.67865077762068, 'average_trace_fitness': 0.952693443622291, 'log_fitness': 0.9553882675309919, 'percentage_of_fitting_traces': 40.67865077762068}


replaying log with TBR, completed variants ::   0%|          | 0/2852 [00:00<?, ?it/s]

Precision:  0.9195696300554975


replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Generalization:  0.8756817512624178
Simplicity:  0.5679012345679013


replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Fitness:  {'perc_fit_traces': 100.0, 'average_trace_fitness': 1.0, 'log_fitness': 1.0, 'percentage_of_fitting_traces': 100.0}


replaying log with TBR, completed variants ::   0%|          | 0/2852 [00:00<?, ?it/s]

Precision:  0.09997653759447034


replaying log with TBR, completed variants ::   0%|          | 0/522 [00:00<?, ?it/s]

Generalization:  0.8914812790824339
Simplicity:  0.5294117647058824
