In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

In [3]:
from dataprep.helperfunctions import *
from dataprep.memory_helperfunctions import prepare_data_f_memory#, prepare_dataset_from_memory

In [4]:
from simulation.simulation_pipeline import *
from simulation.simulation_helpers import *

In [5]:
from experiment.DoE import *

# Make a design table

In [6]:
run_settings = {"num_units":[100],
                "num_blocks":[1],
                "epochs":[100],
                "batch_size":[128], #
                "learningrate":[0.01], 
                "optimizer":["Adam"], #"Adam"
                "y_transformation":["log"], #"log","range"
                
                "process_entropy":["min_entropy"], #,"med_entropy","max_entropy"
                "number_of_traces":[100],
                "statespace_size":[5],
                "process_type":["memoryless"],        #,"memory"        
                "process_memory":[5]}


# Generate a full factorial:
df=build_full_fact(run_settings)#[0:2]

# Recode the string factor levels (recoding from natural number to string)
df = fix_label_values(df, run_settings, variables = ["process_entropy",
                                                     "process_type",
                                                     "optimizer",
                                                     "y_transformation"])

# Important variables
df["RUN"] = df.index + 1
df["Done"] = 0
df["Failure"] = 0

#change types
df.statespace_size = df.statespace_size.astype(int)
df

Unnamed: 0,num_units,num_blocks,epochs,batch_size,learningrate,optimizer,y_transformation,process_entropy,number_of_traces,statespace_size,process_type,process_memory,Name_fix,RUN,Done,Failure
0,100.0,1.0,100.0,128.0,0.01,Adam,log,min_entropy,100.0,5,memoryless,5.0,1,1,0,0


In [7]:
# Loop over the experiments

In [8]:
results = []

for run in df.index:
    #print(run)
    #print(df.loc[run])
    
    """
    Settings from experiments
    """
    curr_settings = df.loc[run]
    
    """
    settings for simulation
    """
    
    SIM_SETTINGS = {"save_eventlog":0, #0 = no, 1 = yes... Standard destination: A:\Process_sim
                
                "statespace_size":make_D(int(curr_settings["statespace_size"])),

                "number_of_traces":int(curr_settings["number_of_traces"]),  

                "process_entropy":curr_settings["process_entropy"],

                "process_type":curr_settings["process_type"],                

                "process_memory":int(curr_settings["process_memory"]),                
                
                                    #desired max number of steps:
                "process_settings":{"med_ent_e_steps":5,
                                    # desired max number of possible transitions in P. 
                                    # NOTE: This can maximally be the number of states, and should be higher than 2
                                    "med_ent_n_transitions":3,
                                    #max number of trials to find matrix with desired max steps
                                    "med_ent_max_trials":5},

                #lambda parameter of inter-arrival times
                "time_settings":{"inter_arrival_time":1.5, 
                                #lambda parameter of process noise
                                "process_stability_scale":0.1,
                                #probability of getting an agent
                                "resource_availability_p":0.5,                          
                                #waiting time in days, when no agent is available      
                                "resource_availability_n":3,
                                #waiting time in days, when no agent is available
                                "resource_availability_m":0.041, 
                                #variation between activity durations
                                "activity_duration_lambda_range":0.5,

                                #time-unit for a full week: days = 7, hrs = 24*7, etc.
                                "Deterministic_offset_W":make_workweek(["weekdays","all-week"][1]),

                                "Deterministic_offset_u":7},

                "run":0}

    # generate the log
    log = Generate_eventlog(SIM_SETTINGS)
    print(len(log))
    
    
    """
    Prepare data for modelling
    """
    input_data = prepare_data_f_memory(log)
    
    """
    Train a model
    """
    # X: 
    input_data["x_train"]
    input_data["x_test"]
    
    # Y:
    input_data["y_train"]
    input_data["y_test"]
    
    """
    Evaluate the model
    """
    
    
    """
    Store the results
    """
    curr_settings["RES_num_events"] = len(log)
    
    curr_settings = pd.DataFrame(curr_settings.T)
    
    results.append(curr_settings)

500
Cases before dropping len=1: 100 cases 500 rows
Cases after dropping len=1: 100 cases 500 rows
Sorting by id, date (chronological order)
Number of cases in log: 100
longest trace is: 5
Time format: 1970-01-06 12:00:00
Std. format: %Y-%m-%d %H:%M:%S
   id event  activity_no                 time        end_datetime
0   1    S2            1  1970-01-06 12:00:00 1970-01-06 11:26:46
1   1    S1            2  1970-01-06 13:46:47 1970-01-06 12:11:26
2   1    S3            4  1970-01-06 17:43:03 1970-01-06 15:13:51
3   1    S4            3  1970-01-06 17:58:57 1970-01-06 13:37:03
4   1    S5            5  1970-01-06 23:47:55 1970-01-06 19:04:19
mode: event
**********************************************************************************************************************************************************************************************************************************************************
Log starts at: 1970-01-05 12:00:00
Last event starts at: 1970-01-11 23:28:49
Train-test 

# Inspect example data

In [11]:
log

Unnamed: 0,caseid,activity,activity_no,y_acc_sum,X,Y,z_t,h_t,b_t,q_t,s_t,v_t,arrival_datetime,start_datetime,end_datetime,start_day,start_hour
0,0,S2,1,1.476928,1.200207,1.476928,1.200207,0.041,0.001347,1.242554,0.257446,0.019275,1970-01-06 04:48:17,1970-01-06 12:00:00,1970-01-06 11:26:46,Tuesday,12
1,0,S1,2,2.984872,1.476928,1.507944,1.200207,0.041,0.056233,1.574161,0.000000,0.031016,1970-01-06 11:26:46,1970-01-06 13:46:47,1970-01-06 12:11:26,Tuesday,13
2,0,S4,3,4.552276,1.507944,1.567404,1.200207,0.082,0.159337,1.749281,0.000000,0.059461,1970-01-06 12:11:26,1970-01-06 17:58:57,1970-01-06 13:37:03,Tuesday,17
3,0,S3,4,6.186898,1.567404,1.634622,1.200207,0.000,0.170830,1.738234,0.000000,0.067218,1970-01-06 13:37:03,1970-01-06 17:43:03,1970-01-06 15:13:51,Tuesday,17
4,0,S5,5,7.981571,1.634622,1.794673,1.200207,0.082,0.274987,1.991609,0.000000,0.160051,1970-01-06 15:13:51,1970-01-06 23:47:55,1970-01-06 19:04:19,Tuesday,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,99,S2,1,148.560166,148.446579,148.560166,148.446579,0.041,0.020631,1.508210,0.000000,0.113588,1970-06-02 10:43:04,1970-01-06 12:11:49,1970-06-02 13:26:38,Tuesday,12
496,99,S1,2,297.254039,148.560166,148.693873,148.446579,0.082,0.033387,1.675554,0.000000,0.133706,1970-06-02 13:26:38,1970-01-06 16:12:47,1970-06-02 16:39:10,Tuesday,16
497,99,S4,3,446.005386,148.693873,148.751347,148.446579,0.082,0.013838,1.789711,0.000000,0.057474,1970-06-02 16:39:10,1970-01-06 18:57:10,1970-06-02 18:01:56,Tuesday,18
498,99,S3,4,594.900833,148.751347,148.895448,148.446579,0.000,0.088464,1.839810,0.000000,0.144101,1970-06-02 18:01:56,1970-01-06 20:09:19,1970-06-02 21:29:26,Tuesday,20


In [12]:
input_data

{'x_train': array([[[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 43200.0, 2, ..., 0.0, 0.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 43200.0, 2, ..., 0.0, 0.0, 0.0],
         [3, 49607.0, 2, ..., 0.0, 0.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 43200.0, 2, ..., 0.0, 0.0, 0.0],
         [3, 49607.0, 2, ..., 0.0, 0.0, 0.0],
         [4, 63783.0, 2, ..., 1.0, 0.0, 0.0]],
 
        ...,
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 43909.0, 2, ..., 0.0, 0.0, 0.0],
         [3, 58367.0, 2, ..., 0.0, 0.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.