In [1]:
### From tutorial 1-4 ###

In [2]:
import pandas as pd
import pm4py

from pm4py.objects.log.util import func as functools

In [3]:
file_path = 'running-example.csv'
event_log = pd.read_csv(file_path, sep=';')
num_events = len(event_log)
num_cases = len(pd.unique(event_log['case_id']))

print('Number of events = ' + str(num_events) + ' and the number of cases = ' + str(num_cases) + '.')


Number of events = 42 and the number of cases = 6.


In [4]:
# work with pm4py

event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp', timest_format='%Y-%m-%d %H:%M:%S%z')
start_activities = pm4py.get_start_activities(event_log)
end_activities = pm4py.get_end_activities(event_log)

print('Start activities: ' + str(start_activities) + '.')
print('End activities: ' + str(end_activities) + '.')

Start activities: {'register request': 6}.
End activities: {'reject request': 3, 'pay compensation': 3}.


In [5]:
# look at the event log

print(event_log)
print(' ')
for t in event_log:
    print(t)

    case_id            activity                 timestamp  costs resource  \
14        1    register request 2010-12-30 10:02:00+00:00     50     Pete   
15        1  examine thoroughly 2010-12-31 09:06:00+00:00    400      Sue   
16        1        check ticket 2011-01-05 14:12:00+00:00    100     Mike   
17        1              decide 2011-01-06 10:18:00+00:00    200     Sara   
18        1      reject request 2011-01-07 13:24:00+00:00    200     Pete   
9         2    register request 2010-12-30 10:32:00+00:00     50     Mike   
10        2        check ticket 2010-12-30 11:12:00+00:00    100     Mike   
11        2    examine casually 2010-12-30 13:16:00+00:00    400     Sean   
12        2              decide 2011-01-05 10:22:00+00:00    200     Sara   
13        2    pay compensation 2011-01-08 11:05:00+00:00    200    Ellen   
0         3    register request 2010-12-30 13:32:00+00:00     50     Pete   
1         3    examine casually 2010-12-30 14:06:00+00:00    400     Mike   

In [7]:
# evaluate traces

trace_log = functools.filter_(lambda t: len(t) > 5, event_log)
print(type(trace_log))
print(trace_log)
for t in trace_log:
    print(t)


<class 'pandas.core.frame.DataFrame'>
    case_id            activity                 timestamp  costs resource  \
14        1    register request 2010-12-30 10:02:00+00:00     50     Pete   
15        1  examine thoroughly 2010-12-31 09:06:00+00:00    400      Sue   
16        1        check ticket 2011-01-05 14:12:00+00:00    100     Mike   
17        1              decide 2011-01-06 10:18:00+00:00    200     Sara   
18        1      reject request 2011-01-07 13:24:00+00:00    200     Pete   
9         2    register request 2010-12-30 10:32:00+00:00     50     Mike   
10        2        check ticket 2010-12-30 11:12:00+00:00    100     Mike   
11        2    examine casually 2010-12-30 13:16:00+00:00    400     Sean   
12        2              decide 2011-01-05 10:22:00+00:00    200     Sara   
13        2    pay compensation 2011-01-08 11:05:00+00:00    200    Ellen   
0         3    register request 2010-12-30 13:32:00+00:00     50     Pete   
1         3    examine casually 2010-1

  This is separate from the ipykernel package so we can avoid doing imports until


# I prefer working with a pandas dataframe over an eventstream, but just for the sake of completeness

event_stream = pm4py.convert_to_event_stream(event_log)
event_stream_mike = functools.filter_(lambda e: e['org:resource'] == 'Mike', event_stream)

print(type(event_stream_mike))

for e in event_stream_mike:
    print(e)
    
event_log_mike = pm4py.convert_to_event_log(event_stream_mike)

for t in event_log_mike:
    print(t)