In [1]:
import os
import pandas as pd
import pm4py
from pm4py.algo.filtering.log.cases import case_filter
from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
from pm4py.algo.filtering.log.start_activities import start_activities_filter
from pm4py.algo.filtering.log.end_activities import end_activities_filter
from pm4py.algo.filtering.log.variants import variants_filter
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.util import constants


In [2]:
#Import event log

log_csv = pd.read_csv('../eventlogs/ItalianHelpdeskFinal.csv', sep=',')
log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)
param_keys={constants.PARAMETER_CONSTANT_CASEID_KEY: 'Case ID', 
    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: 'Activity', 
            constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"
           }
log_csv

Unnamed: 0,Case ID,Activity,org:resource,Complete Timestamp,seriousness,customer,product,seriousness_2,service_level,service_type,workgroup
0,Case 1,Assign seriousness,Value 1,2012-10-09 14:50:17+00:00,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
1,Case 1,Take in charge ticket,Value 1,2012-10-09 14:51:01+00:00,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1,Value 1
2,Case 1,Take in charge ticket,Value 2,2012-10-12 15:02:56+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
3,Case 1,Resolve ticket,Value 1,2012-10-25 11:54:26+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
4,Case 1,Closed,Value 3,2012-11-09 12:54:39+00:00,Value 1,Value 1,Value 1,Value 1,Value 2,Value 1,Value 1
...,...,...,...,...,...,...,...,...,...,...,...
21337,Case 4579,Closed,Value 5,2010-09-02 10:11:00+00:00,Value 1,Value 71,Value 3,Value 1,Value 3,Value 1,Value 1
21338,Case 4580,Take in charge ticket,Value 6,2012-01-03 09:33:43+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1
21339,Case 4580,Wait,Value 6,2012-01-10 15:30:11+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1
21340,Case 4580,Resolve ticket,Value 6,2012-01-10 17:07:40+00:00,Value 1,Value 92,Value 3,Value 2,Value 2,Value 2,Value 1


In [3]:
#Convert to dataframe

event_log = log_converter.apply(log_csv,parameters=param_keys)


## 1-Traversing the event log:

In [4]:
#Insights to Variants

from pm4py.statistics.traces.pandas import case_statistics
variants = case_statistics.get_variants_df(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity"})
variants.describe(include='all')

stats = variants.describe(include='all')
print (stats)

                                                  variant
count                                                4579
unique                                                226
top     Assign seriousness,Take in charge ticket,Resol...
freq                                                 2366


In [5]:
#Check cases

from pandas import DataFrame
df = DataFrame(log_csv, columns= ['Case ID', 'Activity','Complete Timestamp'])
count1 = df['Case ID'].count()
print('count cases: ' + str(count1))

count cases: 21342


In [6]:
#Group traces to each variant and order variants according to frequency of traces in variant

from pm4py.statistics.traces.pandas import case_statistics
variants_count = case_statistics.get_variant_statistics(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                      constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "Complete Timestamp"})
variants_count = sorted(variants_count, key=lambda x: x['Case ID'], reverse=True)
variants_count


[{'variant': 'Assign seriousness,Take in charge ticket,Resolve ticket,Closed',
  'Case ID': 2366},
 {'variant': 'Assign seriousness,Take in charge ticket,Wait,Resolve ticket,Closed',
  'Case ID': 552},
 {'variant': 'Assign seriousness,Take in charge ticket,Wait,Take in charge ticket,Resolve ticket,Closed',
  'Case ID': 228},
 {'variant': 'Assign seriousness,Assign seriousness,Take in charge ticket,Resolve ticket,Closed',
  'Case ID': 213},
 {'variant': 'Assign seriousness,Resolve ticket,Closed', 'Case ID': 164},
 {'variant': 'Assign seriousness,Take in charge ticket,Resolve ticket,Resolve ticket,Closed',
  'Case ID': 109},
 {'variant': 'Insert ticket,Assign seriousness,Take in charge ticket,Resolve ticket,Closed',
  'Case ID': 69},
 {'variant': 'Assign seriousness,Resolve ticket,Resolve ticket,Closed',
  'Case ID': 62},
 {'variant': 'Assign seriousness,Take in charge ticket,Resolve ticket,Take in charge ticket,Resolve ticket,Closed',
  'Case ID': 48},
 {'variant': 'Assign seriousness,T

In [7]:

variants = case_statistics.get_variants_df(log_csv,
                                          parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                      constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity"})
count1 = df['Case ID'].count()
print('count cases: ' + str(count1))
print(variants)

count cases: 21342
                                                     variant
Case ID                                                     
Case 1     Assign seriousness,Take in charge ticket,Take ...
Case 10    Assign seriousness,Take in charge ticket,Resol...
Case 100   Assign seriousness,Take in charge ticket,Requi...
Case 1000  Assign seriousness,Assign seriousness,Take in ...
Case 1001  Assign seriousness,Take in charge ticket,Resol...
...                                                      ...
Case 995   Assign seriousness,Take in charge ticket,Wait,...
Case 996   Assign seriousness,Take in charge ticket,Resol...
Case 997   Assign seriousness,Take in charge ticket,Resol...
Case 998   Assign seriousness,Take in charge ticket,Wait,...
Case 999   Assign seriousness,Take in charge ticket,Resol...

[4579 rows x 1 columns]


In [8]:

log_csv = pd.read_csv('../eventlogs/ItalianHelpdeskFinal.csv', sep=',')
#log_csv.rename(columns={'clientID': 'case:clientID'}, inplace=True)
event_log = log_converter.apply(log_csv, parameters=param_keys, variant=log_converter.Variants.TO_EVENT_LOG)
event_log[0][3]

{'Case ID': 'Case 1', 'Activity': 'Resolve ticket', 'org:resource': 'Value 1', 'Complete Timestamp': '2012/10/25 11:54:26.000', 'seriousness': 'Value 1', 'customer': 'Value 1', 'product': 'Value 1', 'seriousness_2': 'Value 1', 'service_level': 'Value 2', 'service_type': 'Value 1', 'workgroup': 'Value 1'}

## 2-Distribution Computation:

## 3-Sorting the cases of each variant:
