In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

In [3]:
from dataprep.helperfunctions import *
from dataprep.memory_helperfunctions import prepare_data_f_memory

In [4]:
from simulation.simulation_pipeline import *
from simulation.simulation_helpers import *

In [5]:
from experiment.DoE import *

# Make a design table

In [6]:
run_settings = {"process_entropy":["min_entropy"], #,"med_entropy","max_entropy"
                "number_of_traces":[100],
                "statespace_size":[5],
                "process_type":["memoryless"],        #,"memory"        
                "process_memory":[5],
                
                "Fact1":[100],
                "Fact2":[100],
                "Fact3":[100]}


# Generate a full factorial:
df=build_full_fact(run_settings)#[0:2]

# Recode the string factor levels (recoding from natural number to string)
df = fix_label_values(df, run_settings, variables = ["process_entropy",
                                                     "process_type"])

# Important variables
df["RUN"] = df.index + 1
df["Done"] = 0
df["Failure"] = 0

#change types
df.statespace_size = df.statespace_size.astype(int)
df

Unnamed: 0,process_entropy,number_of_traces,statespace_size,process_type,process_memory,Fact1,Fact2,Fact3,Name_fix,RUN,Done,Failure
0,min_entropy,100.0,5,memoryless,5.0,100.0,100.0,100.0,1,1,0,0


In [7]:
# Loop over the experiments

In [8]:
results = []

for run in df.index:
    #print(run)
    #print(df.loc[run])
    
    """
    Settings from experiments
    """
    curr_settings = df.loc[run]
    
    """
    settings for simulation
    """
    
    SIM_SETTINGS = {"save_eventlog":1, #0 = no, 1 = yes...
                
                "statespace_size":make_D(int(curr_settings["statespace_size"])),

                "number_of_traces":int(curr_settings["number_of_traces"]),  

                "process_entropy":curr_settings["process_entropy"],

                "process_type":curr_settings["process_type"],                

                "process_memory":int(curr_settings["process_memory"]),                
                
                                    #desired max number of steps:
                "process_settings":{"med_ent_e_steps":5,
                                    # desired max number of possible transitions in P. 
                                    # NOTE: This can maximally be the number of states, and should be higher than 2
                                    "med_ent_n_transitions":3,
                                    #max number of trials to find matrix with desired max steps
                                    "med_ent_max_trials":5},

                #lambda parameter of inter-arrival times
                "time_settings":{"inter_arrival_time":1.5, 
                                #lambda parameter of process noise
                                "process_stability_scale":0.1,
                                #probability of getting an agent
                                "resource_availability_p":0.5,                          
                                #waiting time in days, when no agent is available      
                                "resource_availability_n":3,
                                #waiting time in days, when no agent is available
                                "resource_availability_m":0.041, 
                                #variation between activity durations
                                "activity_duration_lambda_range":0.5,

                                #time-unit for a full week: days = 7, hrs = 24*7, etc.
                                "Deterministic_offset_W":make_workweek(["weekdays","all-week"][1]),

                                "Deterministic_offset_u":7},

                "run":0}

    # generate the log
    log = Generate_eventlog(SIM_SETTINGS)
    print(len(log))
    
    
    """
    Prepare data for modelling
    """
    input_data = prepare_data_f_memory(log)
    
    """
    Train a model
    """
    # X: 
    input_data["x_train"]
    input_data["x_test"]
    
    # Y:
    input_data["y_test"]
    input_data["y_test"]
    
    """
    Evaluate the model
    """
    
    
    """
    Store the results
    """
    curr_settings["RES_num_events"] = len(log)
    
    curr_settings = pd.DataFrame(curr_settings.T)
    
    results.append(curr_settings)

500
Cases before dropping len=1: 100 cases 500 rows
Cases after dropping len=1: 100 cases 500 rows
Sorting by id, date (chronological order)
Number of cases in log: 100
longest trace is: 5
Time format: 1970-01-05 21:52:46
Std. format: %Y-%m-%d %H:%M:%S
   id event  activity_no                 time        end_datetime
0   1    S1            1  1970-01-05 21:52:46 1970-01-05 23:26:01
1   1    S4            2  1970-01-06 12:00:00 1970-01-06 07:45:41
2   1    S3            5  1970-01-06 12:00:00 1970-01-06 09:35:14
3   1    S2            4  1970-01-06 12:41:54 1970-01-06 07:58:27
4   1    S5            3  1970-01-06 14:02:33 1970-01-06 07:52:31
mode: event
**********************************************************************************************************************************************************************************************************************************************************
Log starts at: 1970-01-05 12:00:00
Last event starts at: 1970-01-11 23:27:59
Train-test 

# Inspect example data

In [9]:
log

Unnamed: 0,caseid,activity,activity_no,y_acc_sum,X,Y,z_t,h_t,b_t,q_t,s_t,v_t,arrival_datetime,start_datetime,end_datetime,start_day,start_hour
0,0,S1,1,0.976409,0.909382,0.976409,0.909382,0.000,0.002263,0.911644,0.000000,0.067027,1970-01-05 21:49:30,1970-01-05 21:52:46,1970-01-05 23:26:01,Monday,21
1,0,S4,2,2.299803,0.976409,1.323395,0.909382,0.123,0.093646,1.193055,0.306945,0.040041,1970-01-05 23:26:01,1970-01-06 12:00:00,1970-01-06 07:45:41,Tuesday,12
2,0,S5,3,3.627949,1.323395,1.328146,0.909382,0.123,0.138713,1.585107,0.000000,0.004752,1970-01-06 07:45:41,1970-01-06 14:02:33,1970-01-06 07:52:31,Tuesday,14
3,0,S2,4,4.960213,1.328146,1.332263,0.909382,0.082,0.118956,1.529102,0.000000,0.004117,1970-01-06 07:52:31,1970-01-06 12:41:54,1970-01-06 07:58:27,Tuesday,12
4,0,S3,5,6.359685,1.332263,1.399472,0.909382,0.123,0.028760,1.484024,0.015976,0.051233,1970-01-06 07:58:27,1970-01-06 12:00:00,1970-01-06 09:35:14,Tuesday,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,99,S1,1,143.846190,143.798727,143.846190,143.798727,0.082,0.014744,3.895471,0.000000,0.047463,1970-05-28 19:10:10,1970-01-08 21:29:28,1970-05-28 20:18:30,Thursday,21
496,99,S4,2,287.920156,143.846190,144.073966,143.798727,0.000,0.103110,3.949300,0.000000,0.227776,1970-05-28 20:18:30,1970-01-08 22:46:59,1970-05-29 01:46:30,Thursday,22
497,99,S5,3,432.362374,144.073966,144.442218,143.798727,0.041,0.017165,4.132131,0.367869,0.000383,1970-05-29 01:46:30,1970-01-09 12:00:00,1970-05-29 10:36:47,Friday,12
498,99,S2,4,576.815915,144.442218,144.453540,143.798727,0.082,0.010983,4.535201,0.000000,0.011322,1970-05-29 10:36:47,1970-01-09 12:50:41,1970-05-29 10:53:05,Friday,12


In [10]:
input_data

{'x_train': array([[[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 78766.0, 1, ..., 0.0, 0.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 78766.0, 1, ..., 0.0, 0.0, 0.0],
         [3, 43200.0, 2, ..., 0.0, 1.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 78766.0, 1, ..., 0.0, 0.0, 0.0],
         [3, 43200.0, 2, ..., 0.0, 1.0, 0.0],
         [4, 43200.0, 2, ..., 1.0, 0.0, 0.0]],
 
        ...,
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
         [2, 77368.0, 4, ..., 0.0, 0.0, 0.0],
         [3, 82019.0, 4, ..., 0.0, 1.0, 0.0]],
 
        [[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.