In [1]:
import os
import pandas as pd
import pm4py
from collections import Counter

In [13]:
df = pd.read_csv("data/running_example.csv", sep=";")
df[:5]

Unnamed: 0,case_id,activity,timestamp,costs,org:resource
0,3,register request,2010-12-30 14:32:00+01:00,50,Pete
1,3,examine casually,2010-12-30 15:06:00+01:00,400,Mike
2,3,check ticket,2010-12-30 16:34:00+01:00,100,Ellen
3,3,decide,2011-01-06 09:18:00+01:00,200,Sara
4,3,reinitiate request,2011-01-06 12:18:00+01:00,200,Sara


In [18]:
df["case_id"].nunique()

6

In [14]:
df["timestamp"][0]

'2010-12-30 14:32:00+01:00'

In [15]:
# also influence original df
log: pd.DataFrame = pm4py.format_dataframe(df, case_id="case_id", activity_key="activity", timestamp_key="timestamp")
log[:5]

Unnamed: 0,case_id,activity,timestamp,costs,org:resource,case:concept:name,concept:name,time:timestamp,@@index,@@case_index
0,1,register request,2010-12-30 10:02:00+00:00,50,Pete,1,register request,2010-12-30 10:02:00+00:00,0,0
1,1,examine thoroughly,2010-12-31 09:06:00+00:00,400,Sue,1,examine thoroughly,2010-12-31 09:06:00+00:00,1,0
2,1,check ticket,2011-01-05 14:12:00+00:00,100,Mike,1,check ticket,2011-01-05 14:12:00+00:00,2,0
3,1,decide,2011-01-06 10:18:00+00:00,200,Sara,1,decide,2011-01-06 10:18:00+00:00,3,0
4,1,reject request,2011-01-07 13:24:00+00:00,200,Pete,1,reject request,2011-01-07 13:24:00+00:00,4,0


In [16]:
log["time:timestamp"][0]

Timestamp('2010-12-30 10:02:00+0000', tz='UTC')

In [17]:
pm4py.get_start_activities(log)  # eq. dict(Counter(log.groupby("case_id")["activity"].first().tolist()))

{'register request': 6}

In [19]:
pm4py.get_end_activities(log)  # eq. dict(Counter(log.groupby("case_id")["activity"].last().tolist()))

{'reject request': 3, 'pay compensation': 3}

In [2]:
log_xes = pm4py.read_xes("data/running_example.xes", return_legacy_log_object=True)



parsing log, completed traces ::   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
type(log_xes)

pm4py.objects.log.obj.EventLog

In [4]:
df = pm4py.convert_to_dataframe(log_xes)
df[:5]

Unnamed: 0,concept:name,org:resource,time:timestamp,Activity,Resource,Costs,case:concept:name
0,register request,Pete,2010-12-30 14:32:00+00:00,register request,Pete,50,3
1,examine casually,Mike,2010-12-30 15:06:00+00:00,examine casually,Mike,400,3
2,check ticket,Ellen,2010-12-30 16:34:00+00:00,check ticket,Ellen,100,3
3,decide,Sara,2011-01-06 09:18:00+00:00,decide,Sara,200,3
4,reinitiate request,Sara,2011-01-06 12:18:00+00:00,reinitiate request,Sara,200,3


In [35]:
pm4py.write_xes(df, "data/test.xes")

exporting log, completed traces ::   0%|          | 0/6 [00:00<?, ?it/s]

In [25]:
from pm4py.objects.log.obj import EventLog, EventStream, Event

In [15]:
EventLog(log_xes._list, attributes=log_xes.attributes, extensions=log_xes.extensions, 
         omni_present=log_xes.omni_present, classifiers=log_xes.classifiers, 
         properties=log_xes.properties) == log_xes

True

In [28]:
pm4py.filter_start_activities(log_xes, ["register request"], retain=True)

[{'attributes': {'concept:name': '3'}, 'events': [{'concept:name': 'register request', 'org:resource': 'Pete', 'time:timestamp': datetime.datetime(2010, 12, 30, 14, 32, tzinfo=datetime.timezone.utc), 'Activity': 'register request', 'Resource': 'Pete', 'Costs': '50', 'case:concept:name': '3'}, '..', {'concept:name': 'pay compensation', 'org:resource': 'Ellen', 'time:timestamp': datetime.datetime(2011, 1, 15, 10, 45, tzinfo=datetime.timezone.utc), 'Activity': 'pay compensation', 'Resource': 'Ellen', 'Costs': '200', 'case:concept:name': '3'}]}, '....', {'attributes': {'concept:name': '4'}, 'events': [{'concept:name': 'register request', 'org:resource': 'Pete', 'time:timestamp': datetime.datetime(2011, 1, 6, 15, 2, tzinfo=datetime.timezone.utc), 'Activity': 'register request', 'Resource': 'Pete', 'Costs': '50', 'case:concept:name': '4'}, '..', {'concept:name': 'reject request', 'org:resource': 'Ellen', 'time:timestamp': datetime.datetime(2011, 1, 12, 15, 44, tzinfo=datetime.timezone.utc), 