In [None]:
import os
import pandas as pd

import pm4py

## Log Util
# Log conversion
from pm4py.objects.conversion.log import converter as log_converter
# Read Log (e.g., If you can not use the simplified interface because you do not have ipywidgets installed)
from pm4py.objects.log.importer.xes import importer as xes_importer

## Conformance Checking
# Token-based replay (e.g., If you can not use the simplified interface because you do not have ipywidgets installed)
from pm4py.algo.evaluation.replay_fitness import evaluator as replay_fitness_evaluator
from pm4py.algo.conformance.tokenreplay import algorithm as token_replay
# Standard Alignments (e.g., If you can not use the simplified interface because you do not have ipywidgets installed)
from pm4py.algo.conformance.alignments import algorithm as alignments
# Decomposed Alignments (e.g., If you can not use the simplified interface because you do not have ipywidgets installed)
from pm4py.algo.conformance.alignments.decomposed import algorithm as decomp_alignments

## Petri Nets
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.petri_net.utils import petri_utils



In [23]:
from pm4py.algo.filtering.log.cases import case_filter
from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
from pm4py.algo.filtering.log.start_activities import start_activities_filter
from pm4py.algo.filtering.log.end_activities import end_activities_filter
from pm4py.algo.filtering.log.variants import variants_filter



In [24]:
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.util import constants

log_csv = pd.read_csv('../eventlogs/ItalianHelpdeskFinal.csv', sep=',')
log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)
param_keys={constants.PARAMETER_CONSTANT_CASEID_KEY: 'Case ID', 
    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: 'Activity', 
            constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"
           }
log_csv

Unnamed: 0,Case ID,Activity,org:resource,Complete Timestamp,seriousness,customer,product,seriousness_2,service_level,service_type,workgroup
0,Case 1,Assign seriousness,Value 1,2012-10-09 14:50:17+00:00,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
1,Case 1,Take in charge ticket,Value 1,2012-10-09 14:51:01+00:00,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
2,Case 1,Take in charge ticket,Value 2,2012-10-12 15:02:56+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
3,Case 1,Resolve ticket,Value 1,2012-10-25 11:54:26+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
4,Case 1,Closed,Value 3,2012-11-09 12:54:39+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
...,...,...,...,...,...,...,...,...,...,...,...
21337,Case 4579,Closed,Value 5,2010-09-02 10:11:00+00:00,Value 1,Value 71,Value 3,Value 1,Value 3,Value 1,Value 1
21338,Case 4580,Take in charge ticket,Value 6,2012-01-03 09:33:43+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1
21339,Case 4580,Wait,Value 6,2012-01-10 15:30:11+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1
21340,Case 4580,Resolve ticket,Value 6,2012-01-10 17:07:40+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1


In [25]:
event_log = log_converter.apply(log_csv,parameters=param_keys)
event_log

[{'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Assign seriousness', 'org:resource': 'Value 1', 'Complete Timestamp': Timestamp('2012-10-09 14:50:17+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 1', 'service_type': 'Value 1', 'workgroup': 'Value 1'}, '..', {'Case ID': 'Case 1', 'Activity': 'Closed', 'org:resource': 'Value 3', 'Complete Timestamp': Timestamp('2012-11-09 12:54:39+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}]}, '....', {'attributes': {'concept:name': 'Case 4580'}, 'events': [{'Case ID': 'Case 4580', 'Activity': 'Take in charge ticket', 'org:resource': 'Value 6', 'Complete Timestamp': Timestamp('2012-01-03 09:33:43+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 92', 'product

In [26]:
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
param_keys={constants.PARAMETER_CONSTANT_CASEID_KEY: 'Case ID', 
    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: 'Activity',
           constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"}
tree = inductive_miner.apply_tree(event_log,parameters=param_keys,variant=inductive_miner.Variants.IM)

In [27]:
#pm4py.view_process_tree(tree)

In [28]:
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
param_keys={constants.PARAMETER_CONSTANT_CASEID_KEY: 'Case ID', 
    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: 'Activity',
           constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:'Complete Timestamp'}
dfg = dfg_discovery.apply(event_log,parameters=param_keys)
from pm4py.statistics.start_activities.log import get as start_activities_module
from pm4py.statistics.end_activities.log import get as end_activities_module
start_activities = start_activities_module.get_start_activities(event_log,param_keys)
end_activities = end_activities_module.get_end_activities(event_log,param_keys)

In [29]:
from pm4py.visualization.dfg import visualizer as dfg_visualization
#pm4py.view_dfg(dfg,start_activities,end_activities)

In [30]:
def filterLog(activity, trace):
    for event in trace:
        if event["Activity"] == activity:
            return False 
    return True

filtered_log = [trace for trace in event_log if filterLog("Special Priority Appointment granted",trace)]



In [31]:
tree = inductive_miner.apply_tree(filtered_log,parameters=param_keys,variant=inductive_miner.Variants.IM)



In [32]:
#pm4py.view_process_tree(tree)

In [33]:
def find2Events(activity, trace):
    count = 0
    for event in trace:
        if event["Activity"] == activity:
            count = count + 1
    if count > 1:
        return True
    else:
        return False
        

In [34]:
vaccine2Log = [trace for trace in event_log if find2Events("Wait",trace)]

In [35]:
vaccine2Log[0]

{'attributes': {'concept:name': 'Case 34'}, 'events': [{'Case ID': 'Case 34', 'Activity': 'Assign seriousness', 'org:resource': 'Value 14', 'Complete Timestamp': Timestamp('2013-06-11 13:56:40+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 29', 'product': 'Value 6', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}, '..', {'Case ID': 'Case 34', 'Activity': 'Closed', 'org:resource': 'Value 3', 'Complete Timestamp': Timestamp('2013-07-20 10:18:42+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 29', 'product': 'Value 6', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}]}

In [74]:
dfg_secondVac = dfg_discovery.apply(vaccine2Log,parameters=param_keys)
start_activities_doubleVac = start_activities_module.get_start_activities(vaccine2Log,param_keys)
end_activities_doubleVac = end_activities_module.get_end_activities(vaccine2Log,param_keys)
#pm4py.view_dfg(dfg_secondVac,start_activities_doubleVac,end_activities_doubleVac)

In [17]:
from pm4py.statistics.traces.pandas import case_statistics
variants = case_statistics.get_variants_df(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity"})
variants.describe(include='all')

stats = variants.describe(include='all')
print (stats)




                                                  variant
count                                                4579
unique                                                226
top     Assign seriousness,Take in charge ticket,Resol...
freq                                                 2366


In [38]:
from pm4py.statistics.traces.pandas import case_statistics
variants_count = case_statistics.get_variant_statistics(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                      constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"})
variants_count = sorted(variants_count, key=lambda x: x['Case ID'], reverse=True)
#important


In [36]:

filtered_df2 = variants_filter.apply(event_log, variants,
                                          parameters={"positive": False, constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity"})
filtered_df2

[{'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Assign seriousness', 'org:resource': 'Value 1', 'Complete Timestamp': Timestamp('2012-10-09 14:50:17+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 1', 'service_type': 'Value 1', 'workgroup': 'Value 1'}, '..', {'Case ID': 'Case 1', 'Activity': 'Closed', 'org:resource': 'Value 3', 'Complete Timestamp': Timestamp('2012-11-09 12:54:39+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}]}, '....', {'attributes': {'concept:name': 'Case 4568'}, 'events': [{'Case ID': 'Case 4568', 'Activity': 'Assign seriousness', 'org:resource': 'Value 1', 'Complete Timestamp': Timestamp('2011-09-16 09:12:06+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 5', 'product': '

In [37]:
from pandas import DataFrame



df = DataFrame(log_csv, columns= ['Case ID', 'Activity','Complete Timestamp'])


count1 = df['Case ID'].count()
print('count cases: ' + str(count1))



count cases: 21342


In [39]:
variants = case_statistics.get_variants_df(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity"})
count1 = df['Case ID'].count()
print('count cases: ' + str(count1))
print(variants)

count cases: 21342
                                                     variant
Case ID                                                     
Case 1     Assign seriousness,Take in charge ticket,Take ...
Case 10    Assign seriousness,Take in charge ticket,Resol...
Case 100   Assign seriousness,Take in charge ticket,Requi...
Case 1000  Assign seriousness,Assign seriousness,Take in ...
Case 1001  Assign seriousness,Take in charge ticket,Resol...
...                                                      ...
Case 995   Assign seriousness,Take in charge ticket,Wait,...
Case 996   Assign seriousness,Take in charge ticket,Resol...
Case 997   Assign seriousness,Take in charge ticket,Resol...
Case 998   Assign seriousness,Take in charge ticket,Wait,...
Case 999   Assign seriousness,Take in charge ticket,Resol...

[4579 rows x 1 columns]


In [133]:


import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter

log_csv = pd.read_csv('../eventlogs/ItalianHelpdeskFinal.csv', sep=',')
log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)


param_keys={constants.PARAMETER_CONSTANT_CASEID_KEY: 'Case ID', 
    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: 'Activity', 
            constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"
           }
event_log = log_converter.apply(log_csv,parameters=param_keys)



log_csv = log_csv.sort_values('Activity')
event_log = log_converter.apply(log_csv,parameters=param_keys)

event_log



from pm4py.statistics.traces.pandas import case_statistics



[{'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Assign seriousness', 'org:resource': 'Value 1', 'Complete Timestamp': Timestamp('2012-10-09 14:50:17+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 1', 'service_type': 'Value 1', 'workgroup': 'Value 1'}, '..', {'Case ID': 'Case 1', 'Activity': 'Take in charge ticket', 'org:resource': 'Value 2', 'Complete Timestamp': Timestamp('2012-10-12 15:02:56+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}]}, '....', {'attributes': {'concept:name': 'Case 4370'}, 'events': [{'Case ID': 'Case 4370', 'Activity': 'Insert ticket', 'org:resource': 'Value 1', 'Complete Timestamp': Timestamp('2013-01-04 09:55:16+0000', tz='UTC'), 'seriousness': 'Value 1', 'customer': 'Value 396', 

In [181]:

log_csv = pd.read_csv('../eventlogs/ItalianHelpdeskFinal.csv', sep=',')
#log_csv.rename(columns={'clientID': 'case:clientID'}, inplace=True)
event_log = log_converter.apply(log_csv, parameters=param_keys, variant=log_converter.Variants.TO_EVENT_LOG)
event_log[0][3]

{'Case ID': 'Case 1', 'Activity': 'Resolve ticket', 'org:resource': 'Value 1', 'Complete Timestamp': '2012/10/25 11:54:26.000', 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}

In [180]:
from pm4py.algo.filtering.log.variants import variants_filter

filtered_log = variants_filter.filter_log_variants_percentage(event_log, percentage=0.5)


KeyError: 'concept:name'

In [41]:
from pm4py.statistics.traces.pandas import case_statistics
variants_count = case_statistics.get_variant_statistics(log_csv,
                                          parameters={case_statistics.Parameters.CASE_ID_KEY: "Case ID",
                                                      case_statistics.Parameters.ACTIVITY_KEY: "Activity",
                                                      case_statistics.Parameters.TIMESTAMP_KEY: "Complete Timestamp"})
variants_count = sorted(variants_count, key=lambda x: x['Case ID'], reverse=False)
#variants_count

