# Creating a single event-log

In [1]:
# Install requirements
#!pip install numpy pandas pm4py
#!brew install graphviz

In [2]:
import sys
import os
import numpy as np
sys.path.append(os.path.abspath('../src'))

In [3]:
eventlog_settings = {
                    # number of traces/cases in the event-log
                    "number_of_traces":1000,

                    # level of entropy: min, medium and/or max
                    "process_entropy": "max_entropy",#"min_entropy","med_entropy","max_entropy"

                    # first or higher-order markov chain to represent the transitions "memoryless", "memory"
                    "process_type":"memory",#"memoryless",

                    # order of the HOMC - only specify this when using process with memory
                    "process_memory":2,

                    # number of activity types
                    "statespace_size":5,
                    
                    # number of transitions - only used for medium entropy (should be higher than 2 and < statespace size)
                    "med_ent_n_transitions":3,
                                    
                    # lambda parameter of inter-arrival times
                    "inter_arrival_time":1.5,
                    
                    # lambda parameter of process noise
                    "process_stability_scale":0.1,
                    
                    # probability of agent being available
                    "resource_availability_p":0.5,

                    # number of agents in the process
                    "resource_availability_n":3,

                    # waiting time in full days, when no agent is available. 0.041 is 15 minutes
                    "resource_availability_m":0.041,
                    
                    # variation between activity durations
                    "activity_duration_lambda_range":1,
                    
                    # business hours definition: when can cases be processed? ()
                    "Deterministic_offset_W":"weekdays",

                    # time-unit for a full week: days = 7, hrs = 24*7, etc.
                    "Deterministic_offset_u":7,

                    # offset for the timestamps used (years after 1970)
                    "datetime_offset":54,

                    # seed value for replication: Set this to a fixed value if the results should be reproducible
                    #"seed_value":int(np.random.uniform(low=0, high=2**32 - 1))
                    "seed_value":1337
                    }

In [4]:
from SynBPS.simulation.simulate_eventlog import generate_eventlog

In [5]:
eventlog_settings2 = {
                    # number of traces/cases in the event-log
                    "number_of_traces":1000,

                    # level of entropy: min, medium and/or max
                    "process_entropy": "max_entropy",#"min_entropy","med_entropy","max_entropy"

                    # first or higher-order markov chain to represent the transitions "memoryless", "memory"
                    "process_type":"memoryless",#"memoryless",

                    # order of the HOMC - only specify this when using process with memory
                    "process_memory":2,

                    # number of activity types
                    "statespace_size":5,
                    
                    # number of transitions - only used for medium entropy (should be higher than 2 and < statespace size)
                    "med_ent_n_transitions":3,
                                    
                    # lambda parameter of inter-arrival times
                    "inter_arrival_time":1.5,
                    
                    # lambda parameter of process noise
                    "process_stability_scale":0.1,
                    
                    # probability of agent being available
                    "resource_availability_p":0.5,

                    # number of agents in the process
                    "resource_availability_n":3,

                    # waiting time in full days, when no agent is available. 0.041 is 15 minutes
                    "resource_availability_m":0.041,
                    
                    # variation between activity durations
                    "activity_duration_lambda_range":1,
                    
                    # business hours definition: when can cases be processed? ()
                    "Deterministic_offset_W":"weekdays",

                    # time-unit for a full week: days = 7, hrs = 24*7, etc.
                    "Deterministic_offset_u":7,

                    # offset for the timestamps used (years after 1970)
                    "datetime_offset":54,

                    # seed value for replication: Set this to a fixed value if the results should be reproducible
                    #"seed_value":int(np.random.uniform(low=0, high=2**32 - 1))
                    "seed_value":1337
                    }

In [10]:
# memory
log = generate_eventlog(eventlog_settings) 

seed: 1337
generated traces: 1000
traces: 1000
events: 5496
ids: 1000
seed: 1337
generated traces: 1000
traces: 1000
events: 5496
ids: 1000


In [7]:
print(len(log)) 
print(np.sum(log.z_t)) 
print(np.sum(log.u_t)) 
print(log.activity.value_counts()[0]) 

5496
4044070.380281922
2846.593311724785
1259


  print(log.activity.value_counts()[0])


In [8]:
# memoryless
log = generate_eventlog(eventlog_settings2) 

seed: 1337
traces: 1000
events: 5821
ids: 1000
seed: 1337
traces: 1000
events: 5821
ids: 1000


Unnamed: 0,caseid,activity,activity_no,y_acc_sum,z_t,n_t,q_t,h_t,b_t,s_t,v_t,u_t,starttime,endtime,arrival_datetime,start_datetime,end_datetime,start_day,start_hour
0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
1,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
3,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
4,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5816,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
5817,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
5818,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
5819,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True


In [9]:
print(len(log)) 
print(np.sum(log.z_t)) 
print(np.sum(log.u_t)) 
print(log.activity.value_counts()[0]) 

5821
4336436.045471672
2786.1606218259517
1275


  print(log.activity.value_counts()[0])
