In [3]:
# Suppress warnings in this notebook
import warnings
warnings.filterwarnings('ignore')

# Make an experimental design table
Here the settings for the experiments can be modified in the dictionary called run_settings. Refer to the paper for more details on of each of the parameters.

In [4]:
run_settings = {# level of entropy: min, medium and/or max
                "process_entropy":["min_entropy","med_entropy","max_entropy"],
                
                # number of traces/cases in the event-log
                "number_of_traces":[100],

                # number of activity types
                "statespace_size":[5,10], 

                # first or higher-order markov chain to represent the transitions
                "process_type":["memoryless","memory"], 
                
                # order of HOMC - only specify this when using process with memory
                "process_memory":[2,4],
                
                # number of transitions - only used for medium entropy (should be higher than 2 and < statespace size)
                "med_ent_n_transitions":[3, 5],
                                
                # lambda parameter of inter-arrival times
                "inter_arrival_time":[1.5],
                
                # lambda parameter of process noise
                "process_stability_scale":[0.1],
                
                # probability of agent being available
                "resource_availability_p":[0.5],

                # number of agents in the process
                "resource_availability_n":[3],

                # waiting time in full days, when no agent is available
                "resource_availability_m":[0.041],
                
                # variation between activity durations
                "activity_duration_lambda_range":[1, 5],
                
                # business hours definition: when can cases be processed? ()
                "Deterministic_offset_W":["weekdays", "all-week"],

                # time-unit for a full week: days = 7, hrs = 24*7, etc.
                "Deterministic_offset_u":[7],
                
                # training data format (See Verenich et al., 2019): 
                # True - use first event to predict total cycle-time. 
                # False - use Prefix-log format / each event to predict remaining cycle time.
                "first_state_model":[True],

                # offset for the timestamps used (1970 time after 1970)
                "datetime_offset":[35],
                
                # number of repetitions of the experiments: duplicates the experiment table (2 times here)
                "num_replications":list(range(0, 2))
               }


# import the make_design_table function to generate a full factorial experimental design table
from SynBPS.simulation.DoE import make_design_table
df = make_design_table(run_settings, file="data/design_table.csv")

# inspect the resulting design table
df

saved to data/design_table.csv


Unnamed: 0,process_entropy,number_of_traces,statespace_size,process_type,process_memory,med_ent_n_transitions,inter_arrival_time,process_stability_scale,resource_availability_p,resource_availability_n,resource_availability_m,activity_duration_lambda_range,Deterministic_offset_W,Deterministic_offset_u,first_state_model,datetime_offset,num_replications,RUN,Done,Failure
0,min_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,1,0,0
1,med_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,2,0,0
2,max_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,3,0,0
3,min_entropy,100.0,10,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,4,0,0
4,med_entropy,100.0,10,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,med_entropy,100.0,5,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,380,0,0
380,max_entropy,100.0,5,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,381,0,0
381,min_entropy,100.0,10,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,382,0,0
382,med_entropy,100.0,10,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,383,0,0


In [6]:
# inspect the resulting design table*
df

Unnamed: 0,process_entropy,number_of_traces,statespace_size,process_type,process_memory,med_ent_n_transitions,inter_arrival_time,process_stability_scale,resource_availability_p,resource_availability_n,resource_availability_m,activity_duration_lambda_range,Deterministic_offset_W,Deterministic_offset_u,first_state_model,datetime_offset,num_replications,RUN,Done,Failure
0,min_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,1,0,0
1,med_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,2,0,0
2,max_entropy,100.0,5,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,3,0,0
3,min_entropy,100.0,10,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,4,0,0
4,med_entropy,100.0,10,memoryless,2.0,3.0,1.5,0.0,0.5,3.0,0.0,1.0,weekdays,7.0,True,35.0,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,med_entropy,100.0,5,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,380,0,0
380,max_entropy,100.0,5,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,381,0,0
381,min_entropy,100.0,10,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,382,0,0
382,med_entropy,100.0,10,memory,4.0,5.0,1.5,0.0,0.5,3.0,0.0,5.0,all-week,7.0,True,35.0,1.0,383,0,0


# Define training and evaluation functions
Before running the experiments, you need to define model training and evaluation functions.

In this example we train a first state model, which is a model using only the first observed event (state) to predict to total cycle-time. Setting first_state_model to False will produce a prefix-log, which can be used to predict remaining cycle-time from every observed event in the trace.

Input for the training function is a dictionary named <b>Input_daata</b>, which contain the following:
- x_train
- x_test
- y_train
- y_test

In [7]:
curr_settings = df.loc[0]
curr_settings["run"] = 0

from SynBPS.simulation.simulation_pipeline import generate_eventlog
log = generate_eventlog(curr_settings=curr_settings, output_dir="data/")

# Prefix-log format:
from SynBPS.dataprep.memory_helperfunctions import prepare_data_f_memory
input_data = prepare_data_f_memory(log, verbose=False)

# First-state model:
from SynBPS.dataprep.firststate_helperfunctions import fs_prepare_dataset_from_memory'
input_data = fs_prepare_dataset_from_memory(input_data, sample=1.0, transform="log", first_state=True, verbose=True)

traces: 100
eventlog saved to: data/0_Eventlog_min_entropy_memoryless.csv
events: 500
ids: 100
Cases before dropping len=1: 100 cases 500 rows
Cases after dropping len=1: 100 cases 500 rows
Sorting by id, date (chronological order)
Number of cases in log: 100
longest trace is: 5
Time format: 2005-01-03 06:00:00
Std. format: %Y-%m-%d %H:%M:%S
   id event  activity_no                 time        end_datetime
0   1     e            1  2005-01-03 06:00:00 2005-01-03 14:10:29
1   1     f            2  2005-01-03 14:10:29 2005-01-03 17:53:26
2   1     d            3  2005-01-03 17:53:26 2005-01-03 21:56:51
3   1     c            4  2005-01-04 06:00:00 2005-01-04 09:16:55
4   1     b            5  2005-01-04 09:16:55 2005-01-06 04:32:01
mode: event
********************************************************************************************************************************************************************************************************************************************************

In [9]:
input_data.keys()

dict_keys(['x_train', 'x_test', 'y_train', 'y_test', 'y_t_train', 'y_t_test', 'maxlen', 'num_features', 'Inference_train', 'Inference_test', 'xy_train_firststate', 'xy_test_firststate'])

In [11]:
input_data["x_train"].shape
input_data["xy_train_firststate"].shape

(180, 3)

In [12]:
input_data["xy_train_firststate"]

Unnamed: 0,0,y_time_to_ev,y_closed
0,0.0,12.494925,1
1,0.0,12.138770,1
2,0.0,11.922515,1
3,0.0,10.377210,1
4,0.0,12.764252,1
...,...,...,...
175,0.0,12.049921,1
176,0.0,11.635615,1
177,0.0,11.460185,1
178,0.0,10.694247,1


In [None]:
#from sklearn.model

def training_function():
    print("training")
    return

#from sklearn.metrics 

def eval_function():
    print("evaluation")
    return

# Perform the experiments
Here you want to bring your own code into the end of the simulation loop. 

In [None]:
from SynBPS.simulation.simulation_pipeline import run_experiments
results = run_experiments(training_function, 
                          eval_function, 
                          out_file="data/results.csv", 
                          design_table="data/design_table.csv")