# Hypere param tuning;  grid search the batch size and epochs

* The batch size defines the gradient and how often to update weights. 
* An epoch is the entire training data exposed to the network, batch-by-batch.

* some networks are sensitive to the batch size, such as LSTM **recurrent neural networks and Convolutional Neural Networks**

In [1]:
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

Using Theano backend.


# Model

In [5]:
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=8, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [7]:
create_model().summary()
#or save image. keras visualisation

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 12)                108       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 13        
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


# fix random seed for reproducibility

In [8]:
seed = 7
numpy.random.seed(seed)

# load dataset

In [9]:
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")

In [10]:
# split into input (X) and output (Y) variables

X = dataset[:,0:8]  #all the rows; and 1st 7 column
Y = dataset[:,8]    #all the rows; and last 8 th column only

# create model using Keras Classifier class

In [11]:
model = KerasClassifier(build_fn=create_model, verbose= 0 )

# define the grid search parameters

In [12]:
# define the grid search parameters
#batch_size = [10, 20, 40, 60, 80, 100]
#epochs = [10, 50, 100]
param_grid = dict(batch_size=[10 , 20], epochs=[50,100])

# sklearn GridSearchCV class & fit

In [15]:
%%time
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)#not running paralley on all core
grid_result = grid.fit(X, Y)

Wall time: 10min 13s


# summarize results

In [16]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.684896 using {'batch_size': 20, 'epochs': 100}


In [17]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

In [22]:
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.605469 (0.160964) with: {'batch_size': 10, 'epochs': 50}
0.683594 (0.009568) with: {'batch_size': 10, 'epochs': 100}
0.457031 (0.148335) with: {'batch_size': 20, 'epochs': 50}
0.684896 (0.014382) with: {'batch_size': 20, 'epochs': 100}
