In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd

from datetime import date
from keras import losses, optimizers
from keras.layers import Activation, Dense

from random import randrange
from sklearn.metrics import roc_auc_score
from sklearn.utils import class_weight

from tensorflow import keras
from tensorflow.keras import backend as K, layers, losses, metrics, optimizers
from tensorflow.keras.activations import elu, exponential, hard_sigmoid, linear,\
    relu, sigmoid, softmax, relu, tanh
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.optimizers import Adadelta, Adam, Nadam, RMSprop, SGD
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy,\
    logcosh, mean_absolute_error, mean_squared_error, poisson

from time import time

Using TensorFlow backend.


In [3]:
# Importing data
train_dataset = pd.read_csv(r'Workflow_new_SP_DR_shiny_FS_train_dataset.csv')
test_dataset = pd.read_csv(r'Workflow_new_SP_DR_shiny_FS_test_dataset.csv')

cols_to_drop = [
    'GRID_ID', 'Label', 'Signatory_NO_errors_last_5_days',
    'Signatory_NO_cases_last_5_days',
    'Signatory_NO_errors_last_month', 'Signatory_NO_cases_month']

train_data = train_dataset.drop(cols_to_drop, axis=1)
test_data = test_dataset.drop(cols_to_drop, axis=1)
train_labels = train_dataset.pop('Label')
test_labels = test_dataset.pop('Label')
# train_id = train_dataset.pop('GRID_ID')
# test_id = test_dataset.pop('GRID_ID')

In [4]:
def random_search(train_data,
                  train_labels,
                  test_data,
                  test_labels,
                  path,
                  max_iter=100,
                  max_hours=5,
                  target='Label',
                  min_auc=0.6):

    # set values for random search
    # early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
    lr = np.arange(0.005, 0.1, 0.005)
    batch_size = np.array(range(20, 110, 10))
    units = [16, 32, 64, 128, 256]
    num_hidden_layers = [1, 2, 3, 4, 5, 6]
    epochs = [100, 150, 200, 250, 300, 350]
    dropout = np.arange(0, 0.5, 0.1)
    optimizers = [Adam, SGD]  # Nadam, RMSprop
    losses = ['binary_crossentropy']  # 'logcosh', 'mean_absolute_error', 'poisson', 'mean_squared_error'
    activation = [relu, tanh, elu]  # exponential
    last_activations = [sigmoid]
    early_stop = EarlyStopping(monitor='val_loss',
                               min_delta=0.01,
                               patience=30,
                               verbose=0,
                               mode='min',
                               restore_best_weights=True)
    class_weights = dict(enumerate(
        class_weight.compute_class_weight('balanced',
                                          np.unique(test_labels),
                                          test_labels)))

    # build model based on parameters choosen by talos optimalization
    # initialize lists to append with model results
    models_list = []
    results = []
    start_time = time()
    execution_time = 0
    i = 0
    while (execution_time < max_hours) and i != max_iter:
        curr_num_hidden_layers = num_hidden_layers[randrange(0,
                                                             len(num_hidden_layers),
                                                             1)]
        last_activation = last_activations[randrange(0,
                                                     len(last_activations),
                                                     1)]
        loss_function = losses[randrange(0, len(losses), 1)]
        optimizer_function = optimizers[randrange(0, len(optimizers), 1)]
        current_lr = lr[randrange(0, len(lr), 1)]
        current_epoch = epochs[randrange(0, len(epochs), 1)]
        current_batch_size = batch_size[randrange(0, len(batch_size), 1)]
        current_dropout = dropout[randrange(0, len(dropout), 1)]

        model = Sequential()
        model.add(Dense(units[randrange(0, len(units), 1)],
                        input_dim=train_data.shape[1],
                        activation=activation[randrange(0, len(activation), 1)]))
        model.add(Dropout(current_dropout))
        for j in range(curr_num_hidden_layers):
            model.add(Dense(units[randrange(0, len(units), 1)],
                            activation=activation[randrange(0, len(activation), 1)]))
        model.add(Dense(1,
                        activation=last_activation,
                        kernel_initializer='normal'))
        model.compile(loss=loss_function,
                      optimizer=optimizer_function(lr=current_lr),
                      metrics=['acc'])

        try:
            model.fit(train_data,
                      train_labels,
                      epochs=current_epoch,
                      batch_size=current_batch_size,
                      validation_split=0.2,
                      verbose=0,
                      callbacks=[early_stop])
        except Exception as e:
            error = 'Code: {}, Message: {}'.format(type(e).__name__, str(e))
            print(error)

        test_predictions = model.predict(test_data)
        predictions_df_test = pd.DataFrame(test_predictions)
        train_predictions = model.predict(train_data)
        predictions_df_train = pd.DataFrame(train_predictions)
        auc_train = roc_auc_score(train_labels, predictions_df_train)
        auc_test = roc_auc_score(test_labels, predictions_df_test)

        # for regression use val loss
        # auc = min(history.history['val_loss'])

        model_params = {'lr': current_lr,
                        'batch_size': current_batch_size,
                        'model_layers': model.get_config(),
                        'epochs': current_epoch,
                        'dropout': current_dropout,
                        'optimizer': str(optimizer_function),
                        'losses': loss_function,
                        'auc_test': auc_test,
                        'auc_train': auc_train,
                        'columns_used': test_data.columns.to_list()}
        # append results table with the latest model parameters and metrics
        results.append(model_params)

        if auc_test > min_auc:
            models_list.append(model)
        else:
            models_list.append(np.nan)
        i = i + 1
        execution_time = (time() - start_time) / 3600
        print("Iteration: {}. Time elapsed: {}".format(str(i),
                                                       round(execution_time, 4)))
    results_df = pd.DataFrame(results)
    results_df.to_csv(path + 'results_modelling_' + str(date.today()) + '.csv')
    return models_list, results_df


In [5]:
random_search_results_critical_LE = random_search(
    train_data, train_labels, test_data, test_labels,
    max_iter=400, max_hours=1.2,
    path=r'C:/Users/mmandziej001/Desktop/FCU/SCRIPTS/predictive_qc_lion_king/model_training/random_search_results/',
    min_auc=0.65)

Iteration: 1. Time elapsed: 0.0008
Iteration: 2. Time elapsed: 0.0017
Iteration: 3. Time elapsed: 0.0039
Iteration: 4. Time elapsed: 0.0061
Iteration: 5. Time elapsed: 0.0079
Iteration: 6. Time elapsed: 0.0094
Iteration: 7. Time elapsed: 0.0112
Iteration: 8. Time elapsed: 0.0122
Iteration: 9. Time elapsed: 0.0139
Iteration: 10. Time elapsed: 0.0152
Iteration: 11. Time elapsed: 0.0173
Iteration: 12. Time elapsed: 0.0187
Iteration: 13. Time elapsed: 0.0205
Iteration: 14. Time elapsed: 0.0235
Iteration: 15. Time elapsed: 0.0266
Iteration: 16. Time elapsed: 0.0312
Iteration: 17. Time elapsed: 0.0362
Iteration: 18. Time elapsed: 0.0392
Iteration: 19. Time elapsed: 0.0412
Iteration: 20. Time elapsed: 0.0426
Iteration: 21. Time elapsed: 0.0439
Iteration: 22. Time elapsed: 0.0462
Iteration: 23. Time elapsed: 0.0494
Iteration: 24. Time elapsed: 0.0525
Iteration: 25. Time elapsed: 0.0545
Iteration: 26. Time elapsed: 0.0581
Iteration: 27. Time elapsed: 0.0605
Iteration: 28. Time elapsed: 0.0666
I

Iteration: 226. Time elapsed: 0.5769
Iteration: 227. Time elapsed: 0.5806
Iteration: 228. Time elapsed: 0.5825
Iteration: 229. Time elapsed: 0.585
Iteration: 230. Time elapsed: 0.5927
Iteration: 231. Time elapsed: 0.5991
Iteration: 232. Time elapsed: 0.6006
Iteration: 233. Time elapsed: 0.603
Iteration: 234. Time elapsed: 0.6063
Iteration: 235. Time elapsed: 0.6097
Iteration: 236. Time elapsed: 0.6122
Iteration: 237. Time elapsed: 0.6168
Iteration: 238. Time elapsed: 0.6189
Iteration: 239. Time elapsed: 0.6219
Iteration: 240. Time elapsed: 0.6244
Iteration: 241. Time elapsed: 0.6283
Iteration: 242. Time elapsed: 0.6312
Iteration: 243. Time elapsed: 0.6347
Iteration: 244. Time elapsed: 0.6401
Iteration: 245. Time elapsed: 0.6424
Iteration: 246. Time elapsed: 0.6456
Iteration: 247. Time elapsed: 0.6476
Iteration: 248. Time elapsed: 0.6511
Iteration: 249. Time elapsed: 0.6573
Iteration: 250. Time elapsed: 0.6619
Iteration: 251. Time elapsed: 0.6656
Iteration: 252. Time elapsed: 0.6682
Ite

In [6]:
random_search_results_lion=random_search_results_critical_LE

In [7]:
random_search_results_lion[0][46]

nan

In [25]:
best_model = random_search_results_lion[0][46]
best_model.summary()

In [5]:
model = Sequential()
model.add(Dense(64, input_dim= train_data.shape[1], activation=tanh))
#create a loop making a new dense layer for the amount passed to this model.
#naming the layers helps avoid tensorflow error deep in the stack trace.
model.add(Dense(16,activation=tanh))
model.add(Dropout(0.1))

#model.add(Dropout(0.4))
#model.add(Dense(32,activation=sigmoid))
model.add(Dense(32,activation=relu))
#model.add(Dense(64,activation=relu))
model.add(Dense(32,activation=relu))
model.add(Dense(1,activation=sigmoid,kernel_initializer='normal'))
#add our classification layer.


#setup our optimizer and compile
model.compile(optimizer=SGD(lr=0.005), loss='binary_crossentropy',
             metrics=['acc'])

In [27]:
for layer in best_model.layers:
    try:
        print(layer.input_shape)
    except:
        print('Dropout')

(None, 40)
(None, 64)
(None, 64)
(None, 128)
(None, 256)
(None, 64)


In [28]:
best_model.save(r'C:/Users/mmandziej001/Desktop/FCU/SCRIPTS/predictive_qc_lion_king/model_training/random_search_results/final_model_DR.h5')