# Event Log Preprocessing

In [None]:
import pm4py

Sepsis Event Log: 
Mannhardt, Felix (2016): Sepsis Cases - Event Log. Version 1. 4TU.ResearchData. dataset. https://doi.org/10.4121/uuid:915d2bfb-7e84-49ad-a286-dc35f063a460

In [None]:
input_file_path = './path/to/your/local/Sepsis Cases - Event Log.xes'

log = pm4py.read_xes(input_file_path)
df = pm4py.convert_to_dataframe(log)
df

In [None]:
df.info()

Petri Net obtained using inductive algorithm:

In [None]:
net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)
pm4py.view_petri_net(net, initial_marking, final_marking)

Petri Net obtained using Alpha Algorithm:

In [None]:
net_alpha, initial_marking_alpha, final_marking_alpha = pm4py.algo.discovery.alpha.algorithm.apply(log)

pm4py.view_petri_net(net_alpha, initial_marking_alpha, final_marking_alpha)
df[['case:concept:name', 'concept:name', 'time:timestamp']].to_csv('./EL_tot.csv', index=False)

Preprocessing:

In [None]:
print('Min and Max dates in the log:')
print(min(df['time:timestamp']))
print(max(df['time:timestamp']))

start_date = '2013-11-07'
end_date = '2014-06-07'
log_filtered = pm4py.filter_time_range(log, f"{start_date} 00:00:00", f"{end_date} 23:59:59", mode='traces_contained')
case_log_filtered = list(log_filtered['case:concept:name'].unique())
df_filtered = df.loc[df['case:concept:name'].isin(list(case_log_filtered))]
df_filtered = df_filtered[['case:concept:name', 'concept:name', 'time:timestamp']].rename(columns={'case:concept:name':'ID', 'concept:name':'event', 'time:timestamp':'date'})

print('Number of unique cases in filtered XES:')
print(len(case_log_filtered))
print('Number of unique cases in filtered df:')
print(df_filtered['ID'].nunique())

assert set(case_log_filtered) == set(df_filtered['ID'])

df_filtered['date'] = df_filtered['date'].dt.strftime('%d/%m/%Y %H:%M:%S')

group_id = list(df_filtered['ID'].unique())
third_id = round(len(group_id)/3)
half_id = round(len(group_id)/2)

first_group = group_id[:third_id]
second_group = group_id[third_id:half_id]
third_group = group_id[half_id:]

print('Number of cases in the 3 groups:')
print(len(first_group))
print(len(second_group))
print(len(third_group))


df_filtered_first = df_filtered[df_filtered['ID'].isin(first_group)]
df_filtered_second = df_filtered[df_filtered['ID'].isin(second_group)]
df_filtered_third = df_filtered[df_filtered['ID'].isin(third_group)]

Petri Net obtained using Alpha Algorithm on filtered log:


In [None]:
net, initial_marking, final_marking = pm4py.algo.discovery.alpha.algorithm.apply(log_filtered)

pm4py.view_petri_net(net, initial_marking, final_marking)

Petri Net obtained using Alpha Plus Algorithm on filtered log:

In [None]:
net_Ap, initial_marking_Ap, final_marking_Ap, causal, parallel, follows = pm4py.algo.discovery.alpha.algorithm.apply(log_filtered, variant=pm4py.algo.discovery.alpha.algorithm.Variants.ALPHA_VERSION_PLUS)

pm4py.view_petri_net(net_Ap, initial_marking_Ap, final_marking_Ap)

In [None]:
df_filtered.info()

df_filtered.to_csv('./EL.csv', index=False)
df_filtered_first.to_csv('./EL.1.csv', index=False)
df_filtered_second.to_csv('./EL.2.csv', index=False)
df_filtered_third.to_csv('./EL.3.csv', index=False)