# Explainable Outcome Prediction - Hyperparameter Tuning

- Author: David Steiner
- December 2021

In [4]:
import pandas as pd
import math
import keras
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import save_model, model_from_json, Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Activation
from kerastuner.tuners import RandomSearch, Hyperband, BayesianOptimization
from kerastuner.engine.hyperparameters import HyperParameters
from pickle import dump,load

import pandas as pd
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 100)
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
%matplotlib inline

from prep_custom import get_dataset_settings, create_trace_bucket,remove_features,remove_events,split_data_temporal
from prep_custom import replace_missing_cols,prepare_ml_train_test, aggregate_data, group_infrequent_features, cut_trace_before_activity
from prep_custom import scale_data, scale_data, one_hot_encode, prepare_dl_train_test, define_binary_outcome_label
from pred_custom import get_evaluation_metrics, train_model, plot_train_history, get_cnn_clf, get_lstm_clf, get_gru_clf 

## Dataset Selection and Outcome Definition

In [5]:
data, case_id_col, activity_col, timestamp_col, label_col, resource_col, event_categorical_attributes, event_numeric_attributes, case_categorical_attributes, case_numeric_attributes, static_cols, dynamic_cols, cat_cols = get_dataset_settings('BPIC17')
attributes = [event_categorical_attributes, case_categorical_attributes, event_numeric_attributes, case_numeric_attributes]

Categoric Event Attributes: 6 ['concept:name', 'org:resource', 'Action', 'lifecycle:transition', 'Accepted', 'Selected'] 

Numeric Event Attributes: 13 ['CreditScore', 'FirstWithdrawalAmount', 'MonthlyCost', 'NumberOfTerms', 'OfferedAmount', 'timesincelastevent', 'timesincecasestart', 'timesincemidnight', 'event_nr', 'month', 'weekday', 'hour', 'open_cases'] 

Categoric Case Attributes: 3 ['case:ApplicationType', 'case:LoanGoal', 'EventOrigin'] 

Numeric Case Attributes: 1 ['case:RequestedAmount'] 

Dataset Shape (1202267, 26)


In [6]:
# Select dataset and outcome
data_labeled, drop_events_list, dl_attributes = define_binary_outcome_label(data, attributes, outcome_label='BPIC17-LongRunningCases')

Set labels to 1 for Outcome: Long Running Case
label
0    23135
1     8374
Name: case:concept:name, dtype: int64


## Preprocessing

In [7]:
data_labeled_grouped = group_infrequent_features(data_labeled, max_category_levels = 15)
trace_bucket = create_trace_bucket(data_labeled_grouped, 15)
trace_bucket = remove_features(trace_bucket, dl_attributes[0], dl_attributes[1], dl_attributes[2], dl_attributes[3])
trace_bucket = remove_events(data_labeled, trace_bucket, drop_events_list)

train_raw, test_raw = split_data_temporal(data=trace_bucket, train_ratio=0.8)

Binning and Grouping infrequent factor levels to: Other_infrequent
Making predictions after 15 completed events
Making predictions at time after cases have started on average:  57.7 hours/ 2.403 days
Making predictions at time before cases have finished on average:  467.9 hours/ 19.5 days

 Shape: (472433, 26)
Features removed:  1

 Events included:
['A_Create Application' 'A_Submitted' 'W_Handle leads'
 'W_Complete application' 'A_Concept' 'A_Accepted' 'O_Create Offer'
 'O_Created' 'O_Sent (mail and online)' 'W_Call after offers' 'A_Complete'
 'O_Cancelled' 'W_Validate application' 'O_Sent (online only)' 'A_Denied'
 'O_Refused' 'A_Cancelled' 'W_Assess potential fraud'
 'W_Shortened completion ']


In [9]:
#RNN Preprocessing
train, test = scale_data(train_raw, test_raw)
train = one_hot_encode(train)
test = one_hot_encode(test)
train, test = replace_missing_cols(train, test)

X_train, y_train, feature_names = prepare_dl_train_test(train, 15)
X_test, y_test, feature_names = prepare_dl_train_test(test, 15)

#y_train = to_categorical(y_train, 2)
#y_test = to_categorical(y_test, 2)

[] []


In [None]:
#ML Preprocessing
sequence_encoding_setting = dict(case_id_col=case_id_col, 
                          activity_col=activity_col,
                          label_col=label_col,
                          case_numeric_attributes=case_numeric_attributes,
                          case_categorical_attributes=case_categorical_attributes,
                          event_numeric_attributes=event_numeric_attributes,
                          event_categorical_attributes=event_categorical_attributes,
                            d_event_sequence = False,
                            d_event = True,
                            d_event_categorical = True,
                            d_case_categorical =True,
                            one_hot_case_categorical = True,   
                            d_event_numeric = True,
                            d_case_numeric = True)


train = aggregate_data(train_raw, 
                       **sequence_encoding_setting)

test = aggregate_data(test_raw, 
                      **sequence_encoding_setting)

train, test = replace_missing_cols(train, test)

X_train_ml, y_train_ml, X_test_ml, y_test_ml = prepare_ml_train_test(train,test, balanced=True)

In [None]:
#DNN Preprocessing

#Feature Scaling for DNN
from sklearn.preprocessing import StandardScaler

#Split into feature (X) and target (y) variables
x_train_numeric = X_train_ml
y_train_numeric = y_train_ml[label_col].astype(np.int64)
x_test_numeric = X_test_ml
y_test_numeric = y_test_ml[label_col].astype(np.int64)


#Apply standardization on numerical features
num_cols = x_train_numeric.columns.values

for col in num_cols: 
    scale = StandardScaler().fit(x_train_numeric[[col]])   
    x_train_numeric[col] = scale.transform(x_train_numeric[[col]])
    x_test_numeric[col] = scale.transform(x_test_numeric[[col]])

## DL Models

In [22]:
def tune_gru(hp):
    model = Sequential()
    model.add(GRU(hp.Int('input_unit',min_value=32,max_value=512,step=32),return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2])))
    
    for i in range(hp.Int('n_layers', 1, 2)):
        model.add(GRU(hp.Int(f'lgru_{i}_units',min_value=32,max_value=512,step=32),return_sequences=True))
        
    model.add(GRU(hp.Int('layer_final',min_value=32,max_value=512,step=32)))
    
    model.add(Dropout(hp.Float('Dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    
    model.add(Dense(1, activation=hp.Choice('dense_activation', values=['softmax', 'sigmoid'])))
    
    model.compile(loss='binary_crossentropy', optimizer='sgd',metrics = ['AUC', 'acc'])
    return model

In [26]:
def tune_lstm(hp):
    model = Sequential()
    model.add(LSTM(hp.Int('input_unit',min_value=32,max_value=512,step=32),return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2])))
    
    for i in range(hp.Int('n_layers', 1, 2)):
        model.add(LSTM(hp.Int(f'lstm_{i}_units',min_value=32,max_value=512,step=32),return_sequences=True))
        
    model.add(LSTM(hp.Int('layer_final',min_value=32,max_value=512,step=32)))
    
    model.add(Dropout(hp.Float('Dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    
    model.add(Dense(1, activation=hp.Choice('dense_activation', values=['softmax', 'sigmoid'])))
    
    model.compile(loss='binary_crossentropy', optimizer='sgd',metrics = ['AUC', 'acc'])
    return model

In [None]:
def tune_cnn(hp):
    model = Sequential()
    

    model.add(Conv1D(hp.Int('input_unit', min_value=1, max_value=16, step=2), 
                     kernel_size=hp.Int('input_kernel', min_value=10, max_value=20, step=2), 
                     padding='same',
                     input_shape=(X_train.shape[1], X_train.shape[2]), 
                     #activation=hp.Choice('input_layer_activation', values=['relu', 'tanh'])))
                     activation='tanh'))

    #for i in range(hp.Int('n_cnn_layers', 1, 2)):    
    i=1
    model.add(Conv1D(hp.Int(f'hidden_{i}_units', min_value=1, max_value=16, step=4), 
                 kernel_size=hp.Int(f'hidden_{i}_kernel', min_value=1, max_value=4, step=1), 
                 input_shape=(X_train.shape[1], X_train.shape[2]), 
                 activation=hp.Choice(f'hidden_{i}_activation', values=['relu', 'tanh'])))
    
    model.add(Dropout(hp.Float('Dropout_rate_input',min_value=0,max_value=0.5,step=0.1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
       
    for i in range(hp.Int('n_layers', 4, 6)):
        model.add(Dropout(hp.Float(f'Dropout_rate_{i}',min_value=0,max_value=0.5,step=0.1)))
        model.add(Dense(hp.Int(f'dense_{i}', min_value=2, max_value=256, step=16)))
        #model.add(Activation(hp.Choice(f'dense_{i}_activation', values=['relu', 'tanh'])))
        model.add(Activation('tanh'))
        
    #model.add(Dense(y_train.shape[1], activation=hp.Choice('final_activation', values=['softmax', 'sigmoid'])))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', #crossentropy
                  metrics=['AUC', 'acc', 'loss'],
                  optimizer='sgd')
              
    return model

In [None]:
def tune_dnn(hp):
    model = Sequential()
    model.add(Dense(hp.Int('input_unit', min_value=32, max_value=1024, step=64), 
                    input_dim=x_train_numeric.shape[1], 
                    #activation=hp.Choice(f'input_activation', values=['relu', 'tanh'])))
                    activation='relu')) 
    
    for i in range(hp.Int('n_layers', 1, 4)):
        model.add(Dropout(hp.Float(f'layer_{i}_dropout',min_value=0,max_value=0.5,step=0.1)))
        model.add(BatchNormalization())
        model.add(Dense(hp.Int(f'layer_{i}_dense', min_value=32, max_value=1024, step=64), 
                        #hp.Choice(f'layer_{i}_activation', values=['relu', 'tanh'])
                        activation='relu'))
   
    model.add(Dense(1, activation='sigmoid'))
    
    #adam_opt = tf.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['AUC', 'acc'])
    
    return model

## Tuner

In [27]:
tuner= BayesianOptimization(       
        tune_lstm,
        objective='val_acc',
        max_trials=20,
        executions_per_trial=1,
        project_name='tinelstm'
        )

INFO:tensorflow:Reloading Oracle from existing project .\tinelstm\oracle.json


In [None]:
# DNN Tuner
tuner.search(
        x=x_train_numeric,
        y=y_train_numeric,
        epochs=30,
        batch_size=32,
        validation_data=(x_test_numeric,y_test_numeric)
)

In [28]:
# RNN Tuner
tuner.search(
        x=X_train,
        y=y_train,
        epochs=20,
        batch_size=128,
        validation_data=(X_test,y_test)
)

Trial 20 Complete [00h 16m 47s]
val_acc: 0.80085688829422

Best val_acc So Far: 0.8024436831474304
Total elapsed time: 06h 00m 20s
INFO:tensorflow:Oracle triggered exit


### Tuner Results

In [None]:
tuner.search_space_summary()

In [None]:
tuner.results_summary()

In [29]:
tuner.get_best_hyperparameters()[0].values

{'input_unit': 352,
 'n_layers': 1,
 'lstm_0_units': 512,
 'layer_final': 128,
 'Dropout_rate': 0.2,
 'dense_activation': 'sigmoid',
 'lstm_1_units': 416}

In [None]:
tuner.get_best_models()[0].summary()

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]

In [None]:
save_model(best_model,'GRU_v1.h5')