In [6]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from keras.metrics import BinaryAccuracy, AUC
# Function to create model, required for KerasClassifier

## DL Model Creation

In [7]:
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

## Data Preprocessing

In [8]:
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("data.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]

## Model Instatiation

In [9]:
from keras import backend as K


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [10]:
# create model
model = KerasClassifier(model=create_model, verbose=0, metrics=['acc', f1_m, precision_m, recall_m], metrics__threshold=0.65)


### Results without using Cross Validated Hyperparameter Tuning

In [11]:
result = model.fit(X, Y, validation_split=0.3, epochs=10, verbose=0, batch_size=40)

In [12]:
predict = model.score(X,Y)
print(predict)

0.5598958333333334


## Hyperparameter tuning using GridSearchCV

In [13]:
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)

In [14]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {grid_result.score(X,Y)}"

Best: 0.691406 using {'batch_size': 10, 'epochs': 50}
0.600260 (0.032578) with: {'batch_size': 10, 'epochs': 10}
0.691406 (0.020915) with: {'batch_size': 10, 'epochs': 50}
0.669271 (0.009744) with: {'batch_size': 10, 'epochs': 100}
0.546875 (0.066826) with: {'batch_size': 20, 'epochs': 10}
0.677083 (0.014731) with: {'batch_size': 20, 'epochs': 50}
0.662760 (0.024774) with: {'batch_size': 20, 'epochs': 100}
0.554688 (0.072940) with: {'batch_size': 40, 'epochs': 10}
0.677083 (0.040386) with: {'batch_size': 40, 'epochs': 50}
0.665365 (0.017566) with: {'batch_size': 40, 'epochs': 100}
0.558594 (0.052505) with: {'batch_size': 60, 'epochs': 10}
0.636719 (0.030758) with: {'batch_size': 60, 'epochs': 50}
0.625000 (0.071032) with: {'batch_size': 60, 'epochs': 100}
0.619792 (0.057439) with: {'batch_size': 80, 'epochs': 10}
0.596354 (0.027126) with: {'batch_size': 80, 'epochs': 50}
0.640625 (0.025315) with: {'batch_size': 80, 'epochs': 100}
0.600260 (0.066317) with: {'batch_size': 100, 'epochs': 

'The final score is: 0.7421875'

### Hyperparameter tuning using RandomSearchCV

In [15]:
from sklearn.model_selection import RandomizedSearchCV

parameters = {'batch_size' :batch_size, 'epochs':epochs}
randm_src = RandomizedSearchCV(estimator=model, param_distributions = parameters, cv = 2, n_iter = 10, n_jobs=-1)
rand_result = randm_src.fit(X,Y)

In [16]:
# summarize results
print("Best: %f using %s" % (rand_result.best_score_, rand_result.best_params_))
means = rand_result.cv_results_['mean_test_score']
stds = rand_result.cv_results_['std_test_score']
params = rand_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {rand_result.score(X,Y)}"

Best: 0.687500 using {'epochs': 100, 'batch_size': 20}
0.549479 (0.067708) with: {'epochs': 10, 'batch_size': 10}
0.640625 (0.026042) with: {'epochs': 10, 'batch_size': 20}
0.687500 (0.010417) with: {'epochs': 100, 'batch_size': 20}
0.570312 (0.031250) with: {'epochs': 10, 'batch_size': 40}
0.484375 (0.028646) with: {'epochs': 10, 'batch_size': 100}
0.682292 (0.007812) with: {'epochs': 50, 'batch_size': 10}
0.634115 (0.003906) with: {'epochs': 50, 'batch_size': 60}
0.628906 (0.006510) with: {'epochs': 50, 'batch_size': 40}
0.574219 (0.058594) with: {'epochs': 50, 'batch_size': 100}
0.578125 (0.036458) with: {'epochs': 50, 'batch_size': 20}


'The final score is: 0.6966145833333334'

### Hyperparameter tuning using Nested CV --> KFold

In [17]:
from sklearn.model_selection import KFold

# configure the cross-validation procedure
cv = KFold(n_splits=3, shuffle=True, random_state=1)
# define search space
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=cv)
# execute search
result = search.fit(X, Y)

In [18]:
# summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
means = result.cv_results_['mean_test_score']
stds = result.cv_results_['std_test_score']
params = result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {result.score(X,Y)}"

Best: 0.700521 using {'batch_size': 10, 'epochs': 100}
0.578125 (0.059584) with: {'batch_size': 10, 'epochs': 10}
0.662760 (0.004872) with: {'batch_size': 10, 'epochs': 50}
0.700521 (0.029635) with: {'batch_size': 10, 'epochs': 100}
0.559896 (0.028940) with: {'batch_size': 20, 'epochs': 10}
0.661458 (0.008027) with: {'batch_size': 20, 'epochs': 50}
0.691406 (0.013902) with: {'batch_size': 20, 'epochs': 100}
0.562500 (0.031412) with: {'batch_size': 40, 'epochs': 10}
0.651042 (0.012890) with: {'batch_size': 40, 'epochs': 50}
0.669271 (0.012890) with: {'batch_size': 40, 'epochs': 100}
0.628906 (0.058027) with: {'batch_size': 60, 'epochs': 10}
0.585938 (0.088100) with: {'batch_size': 60, 'epochs': 50}
0.678385 (0.009207) with: {'batch_size': 60, 'epochs': 100}
0.602865 (0.090455) with: {'batch_size': 80, 'epochs': 10}
0.638021 (0.031466) with: {'batch_size': 80, 'epochs': 50}
0.647135 (0.047771) with: {'batch_size': 80, 'epochs': 100}
0.561198 (0.152638) with: {'batch_size': 100, 'epochs':

'The final score is: 0.75'

### Hyperparameter Tuning Using Nested CV --> Stratified KFold

In [19]:
from sklearn.model_selection import StratifiedKFold

# configure the cross-validation procedure
scv = StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
ssearch = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=scv)
# execute search
hresult = ssearch.fit(X, Y)

In [20]:
# summarize results
print("Best: %f using %s" % (hresult.best_score_, hresult.best_params_))
means = hresult.cv_results_['mean_test_score']
stds = hresult.cv_results_['std_test_score']
params = hresult.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {hresult.score(X,Y)}"

Best: 0.683594 using {'batch_size': 10, 'epochs': 100}
0.626302 (0.076823) with: {'batch_size': 10, 'epochs': 10}
0.671875 (0.005208) with: {'batch_size': 10, 'epochs': 50}
0.683594 (0.037760) with: {'batch_size': 10, 'epochs': 100}
0.598958 (0.013021) with: {'batch_size': 20, 'epochs': 10}
0.631510 (0.014323) with: {'batch_size': 20, 'epochs': 50}
0.679688 (0.018229) with: {'batch_size': 20, 'epochs': 100}
0.550781 (0.050781) with: {'batch_size': 40, 'epochs': 10}
0.643229 (0.013021) with: {'batch_size': 40, 'epochs': 50}
0.678385 (0.011719) with: {'batch_size': 40, 'epochs': 100}
0.524740 (0.014323) with: {'batch_size': 60, 'epochs': 10}
0.632812 (0.007812) with: {'batch_size': 60, 'epochs': 50}
0.649740 (0.003906) with: {'batch_size': 60, 'epochs': 100}
0.578125 (0.085938) with: {'batch_size': 80, 'epochs': 10}
0.611979 (0.007812) with: {'batch_size': 80, 'epochs': 50}
0.580729 (0.041667) with: {'batch_size': 80, 'epochs': 100}
0.623698 (0.029948) with: {'batch_size': 100, 'epochs':

'The final score is: 0.7317708333333334'

### Hyperparameter Tuning Using Nested CV --> Repeated KFolds / Repeated Random Sampling

In [21]:
from sklearn.model_selection import RepeatedKFold

# configure the cross-validation procedure
rcv = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
rsearch = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=rcv)
# execute search
rresult = rsearch.fit(X, Y)

In [22]:
# summarize results
print("Best: %f using %s" % (rresult.best_score_, rresult.best_params_))
means = rresult.cv_results_['mean_test_score']
stds = rresult.cv_results_['std_test_score']
params = rresult.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {rresult.score(X,Y)}"

Best: 0.699219 using {'batch_size': 10, 'epochs': 100}
0.621094 (0.035920) with: {'batch_size': 10, 'epochs': 10}
0.660807 (0.025805) with: {'batch_size': 10, 'epochs': 50}
0.699219 (0.020874) with: {'batch_size': 10, 'epochs': 100}
0.511068 (0.077282) with: {'batch_size': 20, 'epochs': 10}
0.646484 (0.034419) with: {'batch_size': 20, 'epochs': 50}
0.647135 (0.023474) with: {'batch_size': 20, 'epochs': 100}
0.583333 (0.016053) with: {'batch_size': 40, 'epochs': 10}
0.613281 (0.045124) with: {'batch_size': 40, 'epochs': 50}
0.648438 (0.027683) with: {'batch_size': 40, 'epochs': 100}
0.585286 (0.022202) with: {'batch_size': 60, 'epochs': 10}
0.622396 (0.033197) with: {'batch_size': 60, 'epochs': 50}
0.647786 (0.010478) with: {'batch_size': 60, 'epochs': 100}
0.483073 (0.064279) with: {'batch_size': 80, 'epochs': 10}
0.583984 (0.053095) with: {'batch_size': 80, 'epochs': 50}
0.630208 (0.041175) with: {'batch_size': 80, 'epochs': 100}
0.477865 (0.129182) with: {'batch_size': 100, 'epochs':

'The final score is: 0.74609375'