**Neural network experiment**

# **import user-specified packages and google drive files**

In [1]:
%xmode Verbose

Exception reporting mode: Verbose


In [3]:
from google.colab import files
uploaded = files.upload()

Saving dataset_confs.py to dataset_confs.py
Saving DatasetManager.py to DatasetManager.py


In [4]:
import dataset_confs
from DatasetManager import DatasetManager

# **import datasets**

In [5]:
from google.colab import files
uploaded = files.upload()

Saving BPIC11_f1.csv to BPIC11_f1.csv
Saving BPIC11_f2.csv to BPIC11_f2.csv
Saving BPIC11_f3.csv to BPIC11_f3.csv
Saving BPIC11_f4.csv to BPIC11_f4.csv


In [6]:
csv_files = {
    "bpic2011": ["BPIC11_f%s"%formula for formula in range(1,5)],
    #"bpic2015": ["BPIC15_%s_f2"%(municipality) for municipality in range(3,6)],
    #"sepsis_cases": ["sepsis_cases_1", "sepsis_cases_2", "sepsis_cases_4"],
    #"bpic2012": ["bpic2012_O_ACCEPTED-COMPLETE","bpic2012_O_CANCELLED-COMPLETE","bpic2012_0_DECLINED-COMPLETE"],
    #"production": ["Production"],
    #"bpic2017": ["BPIC17_O_Accepted","BPIC17_O_Cancelled","BPIC17_0_Refused"],
    #"traffic_fines": ["traffic_fines_%s"%formula for formula in range(1,3)],
    #"hospital_billing": ["hospital_billing_%s"%suffix for suffix in [2,3]]
}
files = []
for k, v in csv_files.items():
    files.extend(v)
dataset_ref_to_datasets = {
     "bpic2011": ["bpic2011_f%s"%formula for formula in range(1,5)],
    #"bpic2015": ["bpic2015_%s_f2"%(municipality) for municipality in range(3,6)],
    #"sepsis_cases": ["sepsis_cases_1", "sepsis_cases_2", "sepsis_cases_4"],
    #"bpic2012": ["bpic2012_accepted","bpic2012_cancelled","bpic2012_declined"],
    #"production": ["production"],
    #"bpic2017": ["bpic2017_accepted","bpic2017_cancelled","bpic2017_refused"],
    #"traffic_fines": ["traffic_fines_%s"%formula for formula in range(1,3)],
    #"hospital_billing": ["hospital_billing_%s"%suffix for suffix in [2,3]]
}

files = []
for k, v in csv_files.items():
    files.extend(v)
datasets = []
for k, v in dataset_ref_to_datasets.items():
    datasets.extend(v)
res = {datasets[i]: files[i] for i in range(len(datasets))}

In [7]:
datasets

['bpic2011_f1', 'bpic2011_f2', 'bpic2011_f3', 'bpic2011_f4']

In [8]:
res

{'bpic2011_f1': 'BPIC11_f1',
 'bpic2011_f2': 'BPIC11_f2',
 'bpic2011_f3': 'BPIC11_f3',
 'bpic2011_f4': 'BPIC11_f4'}

# **import packages and functions**

In [22]:
#Functions and packages
import pandas as pd
import numpy as np
import os
import pickle
import random
from scipy.stats import spearmanr
from sklearn.metrics import roc_auc_score
from sklearn.base import BaseEstimator, TransformerMixin
from pandas.api.types import is_string_dtype
from collections import OrderedDict
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

#hyperopt
import hyperopt
from hyperopt import hp, Trials, fmin, tpe, STATUS_OK
from hyperopt.pyll.base import scope

#LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Dropout, Input, Multiply, concatenate, Embedding, LSTM
from tensorflow.keras.layers import Bidirectional, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Nadam, Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow.keras.utils as ku
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Softmax, Lambda
from tensorflow.keras import backend

#CNN
from tensorflow.keras.layers import Conv1D

# **Own created functions**

In [23]:
#functions
#https://towardsdatascience.com/using-neural-networks-with-embedding-layers-to-encode-high-cardinality-categorical-variables-c1b872033ba2
class ColumnEncoder(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.columns = None
        self.maps = dict()

    def transform(self, X):
        X_copy = X.copy()
        for col in self.columns:
            # encode value x of col via dict entry self.maps[col][x]+1 if present, otherwise 0
            X_copy.loc[:,col] = X_copy.loc[:,col].apply(lambda x: self.maps[col].get(x, -1)+1)
        return X_copy

    def inverse_transform(self, X):
        X_copy = X.copy()
        for col in self.columns:
            values = list(self.maps[col].keys())
            # find value in ordered list and map out of range values to None
            X_copy.loc[:,col] = [values[i-1] if 0<i<=len(values) else None for i in X_copy[col]]
        return X_copy

    def fit(self, X, y=None):
        # only apply to string type columns
        self.columns = [col for col in X.columns if is_string_dtype(X[col])]
        for col in self.columns:
            self.maps[col] = OrderedDict({value: num for num, value in enumerate(sorted(set(X[col])))})
        return self

def prepare_inputs(X_train, X_test, data):  
    global ce
    ce = ColumnEncoder()
    X_train, X_test = X_train.astype(str), X_test.astype(str)
    X_train_enc = ce.fit_transform(X_train)
    X_test_enc = ce.transform(X_test)
    return X_train_enc, X_test_enc
    
def numeric_padding(sequences, maxlen=None, value=0):
    num_samples = len(sequences)
    sample_shape = np.asarray(sequences[0]).shape[1:]
    x = np.full((num_samples, maxlen) + sample_shape, value)
    for idx, s in enumerate(sequences):
        trunc = s[:maxlen]
        x[idx, :maxlen] = trunc[0]
        
def create_index(log_df, column):
    """Creates an idx for a categorical attribute.
    Args:
        log_df: dataframe.
        column: column name.
    Returns:
        index of a categorical attribute pairs.
    """
    temp_list = log_df[[column]].values.tolist()
    subsec_set = {str((x[0])) for x in temp_list}
    subsec_set = sorted(list(subsec_set))
    alias = dict()
    for i, _ in enumerate(subsec_set):
        alias[subsec_set[i]] = i + 1
    return alias

def groupby_caseID(data, cols):
    ans = [pd.DataFrame(y) for x, y in data[cols].groupby('Case ID', as_index=False)]
    return ans

def remove_punctuations(columns_before):
    columns = []
    for string in columns_before:
        new_string = string.replace(":", "_")
        columns.append(new_string)
    return columns

#call this function with the name of the right column
def create_indexes(i, data):
    cat_index = create_index(data, i)
    cat_index['Start'] = 0
    cat_index['End'] = len(cat_index)
    index_cat = {v: k for k, v in cat_index.items()}
    cat_weights = ku.to_categorical(sorted(index_cat.keys()), len(cat_index))
    no_cols = len(data.groupby([i]))+1
    return cat_weights, index_cat, cat_index, no_cols

def labels_after_grouping(data_train,data_test):
    train_labels = []
    for i in range (0,len(data_train)):
        temp_label = data_train[i]['label'].iloc[0]
        train_labels.append(temp_label)

    test_labels = []
    for i in range (0,len(data_test)):
        temp_label = data_test[i]['label'].iloc[0]
        test_labels.append(temp_label)
    train_y = [1 if i!='regular' else 0 for i in train_labels]
    test_y = [1 if i!='regular' else 0 for i in test_labels]
    return train_y, test_y

def pad_cat_data(cols, data_train, data_test, maxlen):
    
    #padding of the different categorical columns
    #train paddings
    paddings_train = []
    for i in cols:
        padding= []
        for k in range(0,len(data_train)):
            temp = []
            temp = list(data_train[k][i])
            padding.append(temp)
        padded = np.array(pad_sequences(padding,maxlen=maxlen, padding='pre', truncating='pre',value=0))
        #padded = padded/len(data.groupby([i]))
        paddings_train.append(padded)

    #test paddings
    paddings_test = []
    for i in cols:
        padding= []
        for k in range(0,len(data_test)):
            temp = []
            temp = list(data_test[k][i])
            padding.append(temp)
        padded = np.array(pad_sequences(padding,maxlen=maxlen, padding='pre', truncating='pre',value=0))
        #padded = padded/len(data.groupby([i]))
        paddings_test.append(padded)
    return paddings_train, paddings_test

def pad_num_data(cols, data_train, data_test, maxlen, dt_train_prefixes, dt_test_prefixes):
    pad_train = []
    pad_test  = []
    for i in cols:
        
        padding = []
        for k in range(0,len(data_train)):
            temp_train = []
            temp_train = list(data_train[k][i])
            padding.append(temp_train)

        padded = np.array(pad_sequences(padding,maxlen=maxlen, padding='pre', truncating='pre',value=0))
        if dt_train_prefixes[i].max() !=0:
           
            padded = padded/dt_train_prefixes[i].max()
        else:
            padded = padded
        pad_train.append(padded)
   
    for i in cols:
      
        padding = []
        for k in range(0,len(data_test)):
            temp_test = []
            temp_test = list(data_test[k][i])
            padding.append(temp_test)
      
        padded = np.array(pad_sequences(padding,maxlen=maxlen, padding='pre', truncating='pre',value=0))
        if dt_test_prefixes[i].max() !=0:
            padded = padded/dt_test_prefixes[i].max()
        else:
            padded = padded
        pad_test.append(padded)
    return pad_train, pad_test

def reshape_num_data(pad_data, cutoff):
        pad_num = np.reshape(pad_data, (len(pad_data), cutoff, 1))
        return pad_num

# **parameters**

In [27]:
#terminology 
#event log dictionary
params_dir = './params_dir_DL'
results_dir = './results' 
column_selection= 'all'
cls_encoding ='embeddings'
classifiers =['LSTM','CNN']
n_iter = 1
n_splits = 3
train_ratio = 0.8
random_state = 22
l2reg=0.001
allow_negative=False
incl_time = True 
incl_res = True
# create results directory
if not os.path.exists(os.path.join(params_dir)):
    os.makedirs(os.path.join(params_dir))

# **Function for preprocessing the data**

In [25]:
# function for preprocessing data

def create_data(dt_train_prefixes, dt_test_prefixes):


  #get the label of the train and test set
  test_y = dataset_manager.get_label_numeric(dt_test_prefixes)
  train_y = dataset_manager.get_label_numeric(dt_train_prefixes)   
  
  #cat columns integerencoded
  cat_cols = cls_encoder_args['dynamic_cat_cols']+cls_encoder_args['static_cat_cols']

  dt_train_prefixes[cat_cols],dt_test_prefixes[cat_cols]= prepare_inputs(dt_train_prefixes[cat_cols], dt_test_prefixes[cat_cols], data)
  dt_train_prefixes[cat_cols] = dt_train_prefixes[cat_cols]+1
  dt_test_prefixes[cat_cols] = dt_test_prefixes[cat_cols]+1
  #append caseId and label
  cat_cols.append('Case ID')
  cat_cols.append('label')
  #groupby case ID
  
  ans_train = groupby_caseID(dt_train_prefixes, cat_cols)
  ans_test = groupby_caseID(dt_test_prefixes, cat_cols)
  #obtain the new label lists after grouping
  train_y, test_y = labels_after_grouping(ans_train, ans_test)
  #remove then back
  cat_cols.remove('label')
  cat_cols.remove('Case ID')
  #pad cat columns
  paddings_train, paddings_test = pad_cat_data(cat_cols, ans_train, ans_test, maxlen)
  
  #NUMERICAL COLUMNS
  numerical_columns = cls_encoder_args['dynamic_num_cols']+cls_encoder_args['static_num_cols']
  numerical_columns.remove('timesincelastevent')
 
  numerical_columns.append('Case ID')
  ans_train2 = groupby_caseID(dt_train_prefixes, numerical_columns)
  ans_test2 = groupby_caseID(dt_test_prefixes, numerical_columns )
  numerical_columns.remove('Case ID')  
  pad_train, pad_test = pad_num_data(numerical_columns, ans_train2, ans_test2, maxlen, dt_train_prefixes, dt_test_prefixes)
  
  #time inputs                   
  ans_time_train= groupby_caseID(dt_train_prefixes,['timesincelastevent', 'Case ID'])
  ans_time_test = groupby_caseID(dt_test_prefixes,['timesincelastevent', 'Case ID'])
  pad_time_train, pad_time_test = pad_num_data(['timesincelastevent'], ans_time_train, ans_time_test, maxlen, dt_train_prefixes, dt_test_prefixes)
  #reshape the time input
  padded_time = reshape_num_data(pad_time_train[0], cutoff)
  padded_time_test=  reshape_num_data(pad_time_test[0], cutoff)
            
  return pad_train, pad_test, paddings_train, paddings_test, padded_time, padded_time_test, train_y, test_y

# **Create and evaluate model**

In [38]:
def create_and_evaluate_model(args):     
        global trial_nr
        trial_nr += 1
        for cv_iter in range(n_splits):
          dt_test_prefixes_original = dt_prefixes[cv_iter]
          dt_train_prefixes_original = pd.DataFrame()
          for cv_train_iter in range(n_splits): 
              if cv_train_iter != cv_iter:
                  dt_train_prefixes_original = pd.concat([dt_train_prefixes_original, dt_prefixes[cv_train_iter]], axis=0)
       
        
        dt_train_prefixes = dt_test_prefixes_original.copy()
        dt_test_prefixes = dt_test_prefixes_original.copy()
        
        pad_train, pad_test, paddings_train, paddings_test, padded_time, padded_time_test, train_y, test_y = create_data(dt_train_prefixes, dt_test_prefixes)
        cat_cols = cls_encoder_args['dynamic_cat_cols']+cls_encoder_args['static_cat_cols']
        
        numerical_columns = cls_encoder_args['dynamic_num_cols']+cls_encoder_args['static_num_cols']
        numerical_columns.remove('timesincelastevent')
        #create the input layers and embeddings
        embeddings= []
        input_layers = []
        preds_all = []
        test_y_all = []
        score = 0
        dim = 0        
        
        for i in cat_cols:
            cat_weights, index_cat, cat_index, no_cols = create_indexes(i, data)
            i = i.replace(':','_')
            i = i.replace(' ','_')
            input_layer = Input(shape=(cutoff,), name=i)
            embedding = Embedding(cat_weights.shape[0],
                                          cat_weights.shape[1],
                                          weights=[cat_weights],
                                          input_length=no_cols,
                                        name='embed_'+i)(input_layer)
            embeddings.append(embedding)
            input_layers.append(input_layer)
            dim += cat_weights.shape[1]
            
        #static input layers
        
        for j in numerical_columns:
            j = j.replace('(','_')
            j = j.replace(')','_')
            j = j.replace(' ','_')
            j = j.replace(':','_')
            input_layer = Input(shape=(cutoff,1), name=j)
            input_layers.append(input_layer)
            embeddings.append(input_layer)
            dim +=1
         
        #Apply dropout on inputs
        full_embs = concatenate(embeddings, name='full_embedding')
        full_embs = Dropout(args['dropout_rate'])(full_embs)
        time_input_layer = Input(shape=(cutoff,1), name='time_input')
        input_layers.append(time_input_layer)
        time_embs = concatenate([full_embs, time_input_layer], name='allInp')
       
        dim += 1
        l2reg=0.001
        
        #create the model inputs
        model_inputs= []
        model_inputs_test= []
        for i in range(0,len(paddings_train)):
                model_inputs.append(paddings_train[i])
            
        for i in range(0,len(paddings_test)):
                model_inputs_test.append(paddings_test[i])
            
        for i in range(0,len(pad_train)):
                model_inputs.append(reshape_num_data(pad_train[i], cutoff))
                 
        for i in range(0,len(pad_test)):
                model_inputs_test.append(reshape_num_data(pad_test[i], cutoff))
                      
        model_inputs.append(padded_time)
        model_inputs_test.append(padded_time_test)
       
        if cls_method =='LSTM':
                #Compute alpha, visit attention
                alpha = Bidirectional(LSTM(args['lstm_size'], return_sequences=True), name='alpha')
                alpha_out = alpha(time_embs)
                alpha_dense = Dense(1, kernel_regularizer=l2(l2reg))
                alpha_out = TimeDistributed(alpha_dense, name='alpha_dense_0')(alpha_out)
                alpha_out = Softmax(axis=1, name='alpha_softmax')(alpha_out)
                
                #Compute beta, codes attention
                beta = Bidirectional(LSTM(args['lstm_size'], return_sequences=True),   name='beta')
                beta_out = beta(time_embs)
                beta_dense = Dense(dim, activation='tanh', kernel_regularizer=l2(l2reg))
                beta_out = TimeDistributed(beta_dense, name='beta_dense_0')(beta_out)
               
                #Compute context vector based on attentions and embeddings
                c_t = Multiply()([alpha_out, beta_out, time_embs])
                c_t = Lambda(lambda x: backend.sum(x, axis=1))(c_t)

                #Make a prediction
                contexts = Dropout(args['dropout_rate'])(c_t)
                output_layer = Dense(1, activation='sigmoid', name='final_output')(contexts)

        elif cls_method =='CNN':
                #compute alpha
                alpha_dense = Dense(1, kernel_regularizer=l2(l2reg))
                alpha_out = TimeDistributed(alpha_dense, name='alpha_dense_0')(embeddings)
                alpha_out = Softmax(axis=1, name='alpha_softmax')(alpha_out)

                #compute beta           
                beta_dense = Dense(dim, activation='tanh', kernel_regularizer=l2(l2reg))
                beta_out = TimeDistributed(beta_dense, name='beta_dense_0')(embeddings)  

                #conv layer
                conv1 = Conv1D(filters=64, kernel_size=5, activation='tanh', input_shape=(maxlen,no_cols))(alpha_out)
                conv1 = Conv1D(filters=64, kernel_size=5, activation='tanh', input_shape=(maxlen,no_cols))(beta_out)
                
                #Compute context vector based on attentions and embeddings
                c_t = Multiply()([alpha_out, beta_out, time_embs])
                c_t = Lambda(lambda x: backend.sum(x, axis=1))(c_t)

                #Make a prediction
                contexts = Dropout(args['dropout_rate'])(c_t)
                output_layer = Dense(1, activation='sigmoid', name='final_output')(contexts)
                
        #MODEL
        
        model = Model(inputs=[input_layers], outputs=output_layer)

        if args['optimizer']=='RMSprop':
                  opt = RMSprop(learning_rate=args['learning_rate'])
        if args['optimizer']=='Nadam':
                  opt = Nadam(learning_rate=args['learning_rate'])
        if args['optimizer']=='Adam':
                  opt = Adam(learning_rate=args['learning_rate'])
        if args['optimizer']=='SGD':
                  opt = SGD(learning_rate=args['learning_rate'])

        model.compile(loss={'final_output':'binary_crossentropy'}, optimizer= opt)

        model.summary()
           
        early_stopping = EarlyStopping(monitor='val_loss', patience=42)
        model_checkpoint = ModelCheckpoint('output_files/models/model_{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
        lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
            
      
        
        result = model.fit(model_inputs,
              np.array(train_y),
              callbacks=[early_stopping, lr_reducer, model_checkpoint],
              validation_split = 0.1,
              verbose=2, batch_size=args['batch_size'],
              epochs=100)
            
        # Get the lowest validation loss of the training epochs
        validation_loss = np.amin(result.history['val_loss']) 
        print('Best validation loss of epoch:', validation_loss)
        
        pred = model.predict(model_inputs_test)
        preds_all.extend(pred)
        test_y_all.extend(test_y)
        score += roc_auc_score(test_y_all, preds_all)
        for k, v in args.items():
          fout_all.write("%s;%s;%s;%s;%s;%s;%s\n" % (trial_nr, dataset_name, cls_method, method_name, k, v, score / n_splits))  
         
        fout_all.write("%s;%s;%s;%s;%s\n" % (trial_nr, dataset_name, cls_method, method_name, 0))   
 
        fout_all.flush()
        return {'loss': validation_loss, 
            'status': STATUS_OK, 
            'model': model, 
            'args': args}

# **loop over datasets and classifiers**

In [None]:
for cls_method in classifiers:
  for dataset_name in datasets:
            dataset_name_csv = res[dataset_name]
            print('Dataset:', dataset_name)
            print('Classifier', cls_method)
            print('Encoding', cls_encoding)
            method_name = "%s_%s"%(column_selection, cls_encoding)            
            # read the data
            data = pd.read_csv(dataset_name_csv+'.csv', sep=';')
            if dataset_name in ['bpic2011_f1', 'bpic2011_f2', 'bpic2011_f3', 'bpic2011_f4','bpic2015_1_f2','bpic2015_2_f2','bpic2015_3_f2','bpic2015_4_f2','bpic2015_5_f2','sepsis_cases_1','sepsis_cases_2','sepsis_cases_4']:
              data['time:timestamp'] = pd.to_datetime(data['time:timestamp'])
            if dataset_name in ['bpic2012_accepted', 'bpic2012_cancelled', 'bpic2012_declined']:
              data['Complete Timestamp'] = pd.to_datetime(data['Complete Timestamp'])
            data['timesincemidnight'] = data['timesincemidnight']/60
            data['timesincemidnight'] = round(data['timesincemidnight'],0)
            data['timesincecasestart'] = data['timesincecasestart']/60
            data['timesincecasestart'] = round(data['timesincecasestart'],0)
            data['timesincelastevent'] = data['timesincelastevent']/60
            data['timesincelastevent'] = round(data['timesincelastevent'],0)
            dataset_manager = DatasetManager(dataset_name)
            
            cls_encoder_args = {'case_id_col': dataset_manager.case_id_col, 
                        'static_cat_cols': dataset_manager.static_cat_cols,
                        'static_num_cols': dataset_manager.static_num_cols, 
                        'dynamic_cat_cols': dataset_manager.dynamic_cat_cols,
                        'dynamic_num_cols': dataset_manager.dynamic_num_cols, 
                        'fillna': True}

            # determine min and max (truncated) prefix lengths
            min_prefix_length = 1
            if "traffic_fines" in dataset_name:
                max_prefix_length = 10
            elif "bpic2017" in dataset_name:
                max_prefix_length = min(20, dataset_manager.get_pos_case_length_quantile(data, 0.90))
            else:
                max_prefix_length = min(40, dataset_manager.get_pos_case_length_quantile(data, 0.90))
            maxlen = cutoff = max_prefix_length

            # split into training and test
            train, _ = dataset_manager.split_data_strict(data, train_ratio, split="temporal")
    
    
            # prepare chunks for CV
            dt_prefixes = []
            class_ratios = []
            for train_chunk, test_chunk in dataset_manager.get_stratified_split_generator(train, n_splits=n_splits):
                class_ratios.append(dataset_manager.get_class_ratio(train_chunk))
                # generate data where each prefix is a separate instance
                dt_prefixes.append(dataset_manager.generate_prefix_data(test_chunk, min_prefix_length, max_prefix_length))
            del train
        
            # set up search space
            if cls_method == "LSTM":
                space = {'dropout_rate'       : hp.uniform('dropout_rate',0.01,0.3),
                'lstm_size'      : scope.int(hp.quniform('units',8,256,8)),
                'batch_size' :      scope.int(hp.quniform('batch_size',64,256,8)),
               'optimizer': hp.choice('optimizer',['Nadam', 'Adam', 'SGD', 'RMSprop']),
               'learning_rate': hp.uniform('learning_rate',0.0001,0.01)
                 }
            if cls_method == "CNN":
                space = {'dropout_rate'       : hp.uniform('dropout_rate',0.01,0.3),
                'batch_size' :      scope.int(hp.quniform('batch_size',64,256,8)),
               'optimizer': hp.choice('optimizer',['Nadam', 'Adam', 'SGD', 'RMSprop']),
               'learning_rate': hp.uniform('learning_rate',0.0001,0.01)
                 }

            # optimize parameters
            
            trial_nr = 1
            trials = Trials()
            fout_all = open(os.path.join(params_dir, "param_optim_all_trials_%s_%s_%s.csv" % (cls_method, dataset_name, method_name)), "w")
            rstate = np.random.RandomState(22)
            trials = Trials()
            best = fmin(create_and_evaluate_model, space, algo=tpe.suggest, max_evals=16, trials=trials, rstate = rstate)
            fout_all.close()

            # write the best parameters
            best_params = hyperopt.space_eval(space, best)
            print(best_params)
            outfile = os.path.join(params_dir, "optimal_params_%s_%s_%s.pickle" % (cls_method, dataset_name, method_name))
            textfile = os.path.join(params_dir, "param_optim_all_trials_%s_%s_%s.txt" % (cls_method, dataset_name, method_name))
            
            # write to file
            from google.colab import files
            with open(textfile, "w") as f:
              f.write(str(best_params))
            files.download(textfile)

            from google.colab import files
            with open(outfile, "wb") as fout:
              pickle.dump(best_params, fout)
            files.download(outfile)

Dataset: bpic2011_f1
Classifier LSTM
Encoding embeddings


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp["orig_case_id"] = tmp[self.case_id_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp[self.case_id_col] = tmp[self.case_id_col].apply(lambda x: "%s_%s"%(x, nr_events))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp["prefix_nr"] = nr_events


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Activity_code (InputLayer)     [(None, 36)]         0           []                               
 Producer_code (InputLayer)     [(None, 36)]         0           []                               
 Section (InputLayer)           [(None, 36)]         0           []                               
 Specialism_code.1 (InputLayer)  [(None, 36)]        0           []                               
 group (InputLayer)             [(None, 36)]         0           []                               
 Diagnosis (InputLayer)         [(None, 36)]         0           []                               
 Treatment_code (InputLayer)    [(None, 36)]         0           []                               
 Diagnosis_code (InputLayer)    [(None, 36)]         0           []                         

  layer_config = serialize_layer_fn(layer)



36/36 - 3s - loss: 0.0330 - val_loss: 0.0521 - lr: 0.0051 - 3s/epoch - 81ms/step

Epoch 3/100
36/36 - 3s - loss: 0.0260 - val_loss: 0.0245 - lr: 0.0051 - 3s/epoch - 81ms/step

Epoch 4/100
36/36 - 3s - loss: 0.0531 - val_loss: 0.0512 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 5/100
36/36 - 3s - loss: 0.0365 - val_loss: 0.0423 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 6/100
36/36 - 3s - loss: 0.0193 - val_loss: 0.0094 - lr: 0.0051 - 3s/epoch - 81ms/step

Epoch 7/100
36/36 - 3s - loss: 0.0334 - val_loss: 0.0423 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 8/100
36/36 - 3s - loss: 0.0241 - val_loss: 0.0193 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 9/100
36/36 - 3s - loss: 0.0116 - val_loss: 0.0352 - lr: 0.0051 - 3s/epoch - 79ms/step

Epoch 10/100
36/36 - 3s - loss: 0.0109 - val_loss: 0.0912 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 11/100
36/36 - 3s - loss: 0.0616 - val_loss: 0.0309 - lr: 0.0051 - 3s/epoch - 78ms/step

Epoch 12/100
36/36 - 3s - loss: 0.1217 - val_loss: 0.1031 - lr: 0.0051

  layer_config = serialize_layer_fn(layer)



46/46 - 2s - loss: 1.0284 - val_loss: 1.0312 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 3/100
46/46 - 2s - loss: 1.0281 - val_loss: 1.0320 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 4/100
46/46 - 2s - loss: 1.0278 - val_loss: 1.0329 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 5/100
46/46 - 2s - loss: 1.0274 - val_loss: 1.0337 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 6/100
46/46 - 2s - loss: 1.0271 - val_loss: 1.0344 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 7/100
46/46 - 2s - loss: 1.0268 - val_loss: 1.0350 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 8/100
46/46 - 2s - loss: 1.0266 - val_loss: 1.0357 - lr: 0.0018 - 2s/epoch - 36ms/step

Epoch 9/100
46/46 - 2s - loss: 1.0263 - val_loss: 1.0365 - lr: 0.0018 - 2s/epoch - 37ms/step

Epoch 10/100
46/46 - 2s - loss: 1.0260 - val_loss: 1.0372 - lr: 0.0018 - 2s/epoch - 37ms/step

Epoch 11/100
46/46 - 2s - loss: 1.0257 - val_loss: 1.0380 - lr: 0.0018 - 2s/epoch - 37ms/step

Epoch 12/100
46/46 - 2s - loss: 1.0254 - val_loss: 1.0383 - lr: 8.8414

  layer_config = serialize_layer_fn(layer)



22/22 - 1s - loss: 0.7323 - val_loss: 0.7311 - lr: 3.2237e-04 - 1s/epoch - 63ms/step

Epoch 3/100
22/22 - 1s - loss: 0.4519 - val_loss: 0.5378 - lr: 3.2237e-04 - 1s/epoch - 59ms/step

Epoch 4/100
22/22 - 1s - loss: 0.2713 - val_loss: 0.4385 - lr: 3.2237e-04 - 1s/epoch - 60ms/step

Epoch 5/100
22/22 - 1s - loss: 0.1839 - val_loss: 0.3848 - lr: 3.2237e-04 - 1s/epoch - 60ms/step

Epoch 6/100
22/22 - 1s - loss: 0.1355 - val_loss: 0.3327 - lr: 3.2237e-04 - 1s/epoch - 59ms/step

Epoch 7/100
22/22 - 1s - loss: 0.1041 - val_loss: 0.3010 - lr: 3.2237e-04 - 1s/epoch - 58ms/step

Epoch 8/100
22/22 - 1s - loss: 0.0828 - val_loss: 0.2834 - lr: 3.2237e-04 - 1s/epoch - 59ms/step

Epoch 9/100
22/22 - 1s - loss: 0.0684 - val_loss: 0.2311 - lr: 3.2237e-04 - 1s/epoch - 59ms/step

Epoch 10/100
22/22 - 1s - loss: 0.0571 - val_loss: 0.2315 - lr: 3.2237e-04 - 1s/epoch - 54ms/step

Epoch 11/100
22/22 - 1s - loss: 0.0488 - val_loss: 0.2178 - lr: 3.2237e-04 - 1s/epoch - 60ms/step

Epoch 12/100
22/22 - 1s - loss