Implementing Grid Search on keras, based on this <a href="https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/tutorial"> tutorial</a> 

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [2]:
iris = load_iris()
X = iris["data"]
y = iris["target"]

The class variable is very imbalanced.

We split the dataset into train and test. We shuffle the examples as the order is not important here.

In [3]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(y)

n_class_var = len(np.unique(y))

y = keras.utils.to_categorical(y, num_classes=n_class_var)

We scale with the Standard Scaler. That is the features are normalized. Models usually work better when this is the case.

In [4]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

The model's architecture. 

In [19]:
def create_model(optimizer="sgd"):
    model = keras.Sequential(
        [
            keras.layers.Dense(
                8, activation="relu", input_shape=(X.shape[-1],)
            ),
            keras.layers.Dense(12, activation="relu"),
            keras.layers.Dense(8, activation="relu"),
            keras.layers.Dense(n_class_var, activation="softmax")
        ]
    )
    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics=['accuracy']
    )
    return model

In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y)

model = create_model()
model.fit(X_train, y_train, batch_size=1, epochs=30)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


array([2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 0, 2, 1, 2, 2, 0, 0, 0, 1, 2, 0, 0,
       1, 2, 2, 2, 1, 0, 2, 2, 0, 2, 2, 1, 1, 0, 0, 1], dtype=int64)

In [25]:
from sklearn.metrics import accuracy_score

accuracy_score(np.argmax(y_test, axis=1), np.argmax(model.predict(X_test), axis=1))

0.9736842105263158

In [26]:
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [1, 4, 8]
epochs = [10, 20]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2,verbose=3)
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 2 folds for each of 6 candidates, totalling 12 fits
[CV 1/2] END ...........batch_size=1, epochs=10;, score=0.240 total time=   0.9s
[CV 2/2] END ...........batch_size=1, epochs=10;, score=0.333 total time=   2.2s
[CV 1/2] END ...........batch_size=1, epochs=20;, score=0.293 total time=   1.1s
[CV 2/2] END ...........batch_size=1, epochs=20;, score=0.333 total time=   1.1s
[CV 1/2] END ...........batch_size=4, epochs=10;, score=0.000 total time=   0.5s
[CV 2/2] END ...........batch_size=4, epochs=10;, score=0.333 total time=   0.8s
[CV 1/2] END ...........batch_size=4, epochs=20;, score=0.227 total time=   0.6s
[CV 2/2] END ...........batch_size=4, epochs=20;, score=0.333 total time=   0.5s
[CV 1/2] END ...........batch_size=8, epochs=10;, score=0.000 total time=   0.5s
[CV 2/2] END ...........batch_size=8, epochs=10;, score=0.000 total time=   0.6s
[CV 1/2] END ...........batch_size=8, epochs=20;, score=0.107 total time=   0.5s
[CV 2/2] END ...........batch_size=8, epochs=20;,

In [12]:
np.argmax(grid.best_estimator_.predict_proba(X[0:1]), axis=1)

array([0], dtype=int64)

In [14]:
grid.cv_results_

{'mean_fit_time': array([0.82676244, 0.98529136, 0.60112453, 0.70766068, 0.4607178 ,
        0.60006571]),
 'std_fit_time': array([0.10468245, 0.01727211, 0.00650096, 0.05540848, 0.01619232,
        0.03235412]),
 'mean_score_time': array([0.15625989, 0.2225064 , 0.1501168 , 0.16512036, 0.14004064,
        0.14517999]),
 'std_score_time': array([0.01661217, 0.08392417, 0.01148713, 0.00155902, 0.01237082,
        0.01446056]),
 'param_batch_size': masked_array(data=[1, 1, 4, 4, 8, 8],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_epochs': masked_array(data=[10, 20, 10, 20, 10, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'batch_size': 1, 'epochs': 10},
  {'batch_size': 1, 'epochs': 20},
  {'batch_size': 4, 'epochs': 10},
  {'batch_size': 4, 'epochs': 20},
  {'batch_size': 8, 'epochs': 10},
  {'batch_size': 8, 'epochs': 20}],