In [1]:
# Import all packages and library

# Import package to scan hyperparameter
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# Import package to reprocess the data
import numpy as np
import pandas as pd
import random
import json

# Import properties from keras
from keras import models
from keras.layers import Dense, Dropout, Activation
from keras import regularizers

# Import keras items
from keras.optimizers import Adam, Adadelta, SGD
from keras.activations import relu, sigmoid
from keras.losses import binary_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.wrappers.scikit_learn import KerasClassifier

Using TensorFlow backend.


In [2]:
# Get all of the data and reprocess them

# Get the reprocessed data from .npy file
x_train = np.load('../r-scripts/getting-data-current/data-sets/x_train.npy')
y_train = np.load('../r-scripts/getting-data-current/data-sets/y_train.npy')

x_dev = np.load('../r-scripts/getting-data-current/data-sets/x_val.npy')
y_dev = np.load('../r-scripts/getting-data-current/data-sets/y_val.npy')

x_test = np.load('../r-scripts/getting-data-current/data-sets/x_test.npy')
y_test = np.load('../r-scripts/getting-data-current/data-sets/y_test.npy')

# This Section is used to shuffle the data

# Aggregates elements
data_training = list(zip(x_train, y_train))
data_development = list(zip(x_dev, y_dev))
data_testing = list(zip(x_test, y_test))

# Shuffle the aggragated element on the list
random.shuffle(data_training)
random.shuffle(data_development)
random.shuffle(data_testing)

# Combine data training dan data development become one list of data train

data_train = data_training + data_development

# Split the shuffled data
x_train, y_train = zip(*data_train)
x_test, y_test = zip(*data_testing)

# Unpack the tuples
x_train = np.array(list(x_train))
y_train = np.array(list(y_train))
x_test = np.array(list(x_test))
y_test = np.array(list(y_test))

# Reshape the datasets
x_train = x_train.reshape(615, 4034 * 20)
x_test = x_test.reshape(150, 4034 * 20)

In [3]:
# Define the model and function

# Define the model base
def build_fc_model(# Hyperparameters as parts of model designs (building blocks)
	                 input_num_hidden_units = 2,
                   num_hidden_layers = [0],
                   activation_function = 'relu',

                   # Hyperparameters as part of optimization and regularization of the models
                   l2_rate = 0.001,
                   input_dropout_rates = 0.5,
                   dropout_rates = 0.5,
                   optim_methods = 'Adam',
                   batch_norm = "yes"
                   ):

	# Add the input layer
    model = models.Sequential()
    model.add(Dense(input_num_hidden_units,
              activation = 'relu',
              kernel_regularizer = regularizers.l2(l2_rate),
              input_dim = x_train.shape[1]))
    model.add(Dropout(input_dropout_rates))

    # Add the hidden layers
    for num in range(len(num_hidden_layers)):
        if num_hidden_layers[num] == 0:
            continue
        else:
            model.add(Dense(num_hidden_layers[num]))

            # Add batch normaization before adding the activation layers
            if batch_norm == "yes":
            	model.add(BatchNormalization())
            else:
            	continue

            model.add(Activation(activation_function))
            model.add(Dropout(dropout_rates))

    # Add the output layer
    model.add(Dense(1,
              activation = 'sigmoid'))

    # Compile the model defined
    model.compile(optimizer = optim_methods,
                  loss = 'binary_crossentropy',
                  metrics = ['acc'])

    # Print the summary of the model
    print(model.summary())

    return model

In [4]:
# Pass the model design to KerasClassifier() wrapper -------------------------------

model = KerasClassifier(build_fn = build_fc_model,
	                         verbose = 1)

# Define the parameters that will be tuned randomly
keras_param_options = {
                       # Hyperparameters as parts of model designs (building blocks)
                       'input_num_hidden_units': [3],
                       'num_hidden_layers': [[1, 3]],
                       'activation_function': ['relu'],
                       # Hyperparameters as part of optimization and regularization of the models
                       'optim_methods' : ['SGD'],
                       'l2_rate':[0.01],
                       'input_dropout_rates': [0.5],
                       'dropout_rates': [0.5],
                       'batch_norm' : ['no'],
                       # Fitting parameters
                       'batch_size': [8],
                       'epochs': [30],
                       'shuffle': [True]
                      }

# Using RandomizedSearchCV to find the best model randomly
random_search = RandomizedSearchCV(model,
                                   param_distributions = keras_param_options,
                                   return_train_score=True,
                                   n_iter = 1,
                                   cv = 5,
                                   verbose = 10)

In [5]:
# Define the function to calculate sensitivity and specificity
def sensitivity_specificity(predictions, y_test, mode='binary'):
    if mode == 'binary':
        # Determine positive class predictions
        index = predictions > 0.5
        predictions = np.zeros(predictions.shape)
        predictions[index] = 1
        # No need to modify y_test since it consists of zeros and ones already
    else:
        y_test = y_test
        predictions = np.argmax(predictions, axis=-1)

    # In the binary classification case as we create, we can extract tn, fp, fn, tp as follows
    tn, fp, fn, tp = confusion_matrix(y_test, predictions, labels = [0, 1]).ravel()
    
    # accuracy = (TP+TN)/(TP+TN+FP+FN)
    acc = (tp + tn)/(tp + tn + fp + fn)

    # Sensitivity = TP / (TP + FN)
    sensitivity = tp / (tp + fn)

    # Specificity = TN / (TN + FP)
    specificity = tn / (tn + fp)

    # Precision = TP / (TP + FP)
    precision = tp / (tp + fp)

    # Return sensitivity, specificity, precision
    return(acc, sensitivity, specificity, precision)

In [7]:
# fit to the training data
random_search.fit(x_train, y_train)
print('Best score obtained: {0}'.format(random_search.best_score_))
print('Parameters:')
for param, value in random_search.best_params_.items():
    print('\t{}: {}'.format(param, value))


# predict the random search using the test data
print('Predict uisng test data using random_search:')
y_pred_random_search = random_search.predict(x_test)
acc_pred_random_search = accuracy_score(y_test, y_pred_random_search)
print('acc y_pred_random_search:', acc_pred_random_search)


# validator.best_estimator_ returns sklearn-wrapped version of best model.
# validator.best_estimator_.model returns the (unwrapped) keras model
print('Evaluate the best model on training data:')
best_model = random_search.best_estimator_.model
metric_names = best_model.metrics_names
metric_values = best_model.evaluate(x_train, y_train)
for metric, value in zip(metric_names, metric_values):
    print(metric, ': ', value)

# predict the test data using the best model obtained
print('Evaluate the best model on test data:')
metric_values_on_test_data = best_model.evaluate(x_test, y_test)
for metric, value in zip(metric_names, metric_values_on_test_data):
    print(metric, ': ', value)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] shuffle=True, optim_methods=SGD, num_hidden_layers=[1, 3], l2_rate=0.01, input_num_hidden_units=3, input_dropout_rates=0.5, epochs=30, dropout_rates=0.5, batch_size=8, batch_norm=no, activation_function=relu 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 3)                 242043    
_________________________________________________________________
dropout_7 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 4         
_________________________________________________________________
dense_27 (Dense)             (None, 3)                 6         
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 4         
Total params: 242,057
Trainable params: 242,057
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   29.6s remaining:    0.0s


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_29 (Dense)             (None, 3)                 242043    
_________________________________________________________________
dropout_8 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 4         
_________________________________________________________________
dense_31 (Dense)             (None, 3)                 6         
_________________________________________________________________
dense_32 (Dense)             (None, 1)                 4         
Total params: 242,057
Trainable params: 242,057
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   59.4s remaining:    0.0s


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_33 (Dense)             (None, 3)                 242043    
_________________________________________________________________
dropout_9 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_34 (Dense)             (None, 1)                 4         
_________________________________________________________________
dense_35 (Dense)             (None, 3)                 6         
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 4         
Total params: 242,057
Trainable params: 242,057
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  1.5min remaining:    0.0s


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_37 (Dense)             (None, 3)                 242043    
_________________________________________________________________
dropout_10 (Dropout)         (None, 3)                 0         
_________________________________________________________________
dense_38 (Dense)             (None, 1)                 4         
_________________________________________________________________
dense_39 (Dense)             (None, 3)                 6         
_________________________________________________________________
dense_40 (Dense)             (None, 1)                 4         
Total params: 242,057
Trainable params: 242,057
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12

[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  1.9min remaining:    0.0s


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_41 (Dense)             (None, 3)                 242043    
_________________________________________________________________
dropout_11 (Dropout)         (None, 3)                 0         
_________________________________________________________________
dense_42 (Dense)             (None, 1)                 4         
_________________________________________________________________
dense_43 (Dense)             (None, 3)                 6         
_________________________________________________________________
dense_44 (Dense)             (None, 1)                 4         
Total params: 242,057
Trainable params: 242,057
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.4min finished


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Best score obtained: 0.6764227644699375
Parameters:
	shuffle: True
	optim_methods: SGD
	num_hidden_layers: [1, 3]
	l2_rate: 0.01
	input_num_hidden_units: 3
	input_dropout_rates: 0.5
	epochs: 30
	dropout_rates: 0.5
	batch_size: 8
	batch_norm: no
	activation_function: relu
Predict uisng test data using random_search:
acc y_pred_random_search: 0.6266666666666667
Evaluate the best model on training data:
loss :  0.12089832242184538
acc :  0.9967479674796748
Evaluate the best model on test data:
loss :  0.8929051224390666
acc :  0.6266666674613952
