<a href="https://colab.research.google.com/github/Sagar856/Data-Science-Notes/blob/main/NN_Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing the necessary packages
import pandas as pd
import numpy as np
import keras
from sklearn.preprocessing import StandardScaler

In [None]:
# Load dataset
dataset = np.loadtxt('/content/pima-indians-diabetes.data.csv', delimiter=",")

In [None]:
X = dataset[:,0:8]
y = dataset[:,8]

In [None]:
# Standardization
a = StandardScaler()
a.fit(X)
X_standardized = a.transform(X)

In [None]:
pd.DataFrame(X_standardized).describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,-8.789266000000001e-17,-9.251859000000001e-18,1.5034270000000003e-17,8.211024000000001e-17,-4.972874e-17,2.960595e-15,1.905883e-15,2.081668e-16
std,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652
min,-1.141852,-3.783654,-3.572597,-1.288212,-0.6928906,-4.060474,-1.189553,-1.041549
25%,-0.8448851,-0.6852363,-0.3673367,-1.288212,-0.6928906,-0.5955785,-0.6889685,-0.7862862
50%,-0.2509521,-0.1218877,0.1496408,0.1545332,-0.4280622,0.0009419788,-0.3001282,-0.3608474
75%,0.6399473,0.6057709,0.5632228,0.7190857,0.4120079,0.5847705,0.4662269,0.6602056
max,3.906578,2.444478,2.734528,4.921866,6.652839,4.455807,5.883565,4.063716


## Tuning of Hyperparameters : Batch Size and Epoch

In [None]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0


In [None]:
# Import packages
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.optimizers import Adam

In [None]:
# Create Model
def create_model():
  model = Sequential()
  model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
  model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
  model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

  adam = Adam(learning_rate=0.01)
  model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
  return model


In [None]:
# Create model
model = KerasClassifier(build_fn = create_model, verbose=0)
# Define the grid search parameters
batch_size = [10,20,30]
epochs = [10,50,100]
# Make the dictionary of grid search parameters
param_grid = dict(batch_size=batch_size, epochs=epochs)

# Build and fit GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv = KFold(), verbose=5)
grid_result = grid.fit(X_standardized, y)

In [None]:
# Summarize the results
print('Best: {}, using: {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
  print('{}, {} with: {}'.format(mean, stdev, param))

Best: 0.7774382480264832, using: {'batch_size': 20, 'epochs': 10}
0.7670401493930906, 0.05517362376184513 with: {'batch_size': 10, 'epochs': 10}
0.7618198794669383, 0.03884341295560071 with: {'batch_size': 10, 'epochs': 50}
0.7475171886936591, 0.04137571772244512 with: {'batch_size': 10, 'epochs': 100}
0.7774382480264832, 0.043013098096244796 with: {'batch_size': 20, 'epochs': 10}
0.7578983108394872, 0.03908615155666879 with: {'batch_size': 20, 'epochs': 50}
0.752652576181988, 0.03572537343226751 with: {'batch_size': 20, 'epochs': 100}
0.7696375519904932, 0.04320354655487485 with: {'batch_size': 30, 'epochs': 10}
0.7579322638146166, 0.05286808179435514 with: {'batch_size': 30, 'epochs': 50}
0.7605042016806722, 0.0429767419202424 with: {'batch_size': 30, 'epochs': 100}


## Tuning of Hyperparameters : Learning rate & Dropout rate

In [None]:
from keras.layers import Dropout

# Defining the model

def create_model(learning_rate, dropout_rate):
  model = Sequential()
  model.add(Dense(8, input_dim = 8, kernel_initializer='normal', activation='relu'))
  model.add(Dropout(dropout_rate))
  model.add(Dense(4, input_dim= 8, kernel_initializer = 'normal', activation='relu'))
  model.add(Dropout(dropout_rate))
  model.add(Dense(1, activation='sigmoid'))

  adam = Adam(learning_rate = learning_rate)
  model.compile(loss= 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
  return model

# Create model

model = KerasClassifier(build_fn = create_model , verbose = 0, batch_size = 20, epochs= 10, learning_rate = [0.001, 0.01, 0.1], dropout_rate = [0.0, 0.1, 0.2])

# Define the grid search parameters

# learning_rate = [0.001, 0.01, 0.1]
# dropout_rate = [0.0, 0.1, 0.2]

# Make a dictionary of grid search parameters

param_grid = dict(learning_rate= learning_rate, dropout_rate = dropout_rate)

# Build and fit the grid search cv

grid = GridSearchCV(estimator= model, param_grid = param_grid, cv = KFold(), verbose = 5)
grid_result = grid.fit(X_standardized, y)

In [None]:
# Summerize the results
print('Best: {}, using: {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{}, {}, with : {}'.format(mean, stdev, param))

Best: 0.7696545284780579, using: {'dropout_rate': 0.0, 'learning_rate': 0.01}
0.7460572107630932, 0.05530262215024485, with : {'dropout_rate': 0.0, 'learning_rate': 0.001}
0.7696545284780579, 0.05125362698381622, with : {'dropout_rate': 0.0, 'learning_rate': 0.01}
0.7383668618962738, 0.05360408679736712, with : {'dropout_rate': 0.0, 'learning_rate': 0.1}
0.7591885238944063, 0.03702459654717801, with : {'dropout_rate': 0.1, 'learning_rate': 0.001}
0.7371190900602665, 0.06554970624716877, with : {'dropout_rate': 0.1, 'learning_rate': 0.01}
0.7213903743315508, 0.0233238590467078, with : {'dropout_rate': 0.1, 'learning_rate': 0.1}
0.7605042016806722, 0.04545615034625589, with : {'dropout_rate': 0.2, 'learning_rate': 0.001}
0.7657074951192598, 0.04768866236811669, with : {'dropout_rate': 0.2, 'learning_rate': 0.01}
0.6991511756217639, 0.05022039574610123, with : {'dropout_rate': 0.2, 'learning_rate': 0.1}


## Tuning of Hyperparameters : Activation Function & Kernel Initializer

In [None]:
# Defining the model

def create_model(activation_function, kernel_initializer):
  model = Sequential()
  model.add(Dense(8, input_dim = 8, kernel_initializer = kernel_initializer, activation = activation_function))
  model.add(Dense(4, input_dim= 8, kernel_initializer = kernel_initializer, activation = activation_function))
  model.add(Dense(1, activation='sigmoid'))

  adam = Adam(learning_rate = 0.01)
  model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
  return model

# Create the model

model = KerasClassifier(build_fn = create_model, verbose = 0, batch_size = 20, epochs = 10, activation_function = ['softmax', 'relu', 'tanh', 'linear'], kernel_initializer = ['uniform', 'normal', 'zero'])

# Define the grid search parameters
activation_function = ['softmax', 'relu', 'tanh', 'linear']
kernel_initializer = ['uniform', 'normal', 'zero']

# Make a dictionary of grid search parameters
param_grid = dict(activation_function = ['softmax', 'relu', 'tanh', 'linear'], kernel_initializer = ['uniform', 'normal', 'zero'])

# Build and fit grid search CV

grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = KFold(), verbose = 0)
grid_result = grid.fit(X_standardized, y)


In [None]:
# Summarize the results
print('Best: {}, using : {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{}, {}, using:{}'.format(mean,stdev,param))

Best: 0.7748578219166455, using : {'activation_function': 'relu', 'kernel_initializer': 'normal'}
0.7591970121381886, 0.03609889912127852, using:{'activation_function': 'softmax', 'kernel_initializer': 'uniform'}
0.7578983108394873, 0.04340911937408291, using:{'activation_function': 'softmax', 'kernel_initializer': 'normal'}
0.7448688566335624, 0.034719693797489196, using:{'activation_function': 'softmax', 'kernel_initializer': 'zero'}
0.7592224768695357, 0.049790661888329625, using:{'activation_function': 'relu', 'kernel_initializer': 'uniform'}
0.7748578219166455, 0.05332923425325097, using:{'activation_function': 'relu', 'kernel_initializer': 'normal'}
0.6511586452762923, 0.05244525561493349, using:{'activation_function': 'relu', 'kernel_initializer': 'zero'}
0.7565911212970036, 0.03785042390953481, using:{'activation_function': 'tanh', 'kernel_initializer': 'uniform'}
0.7708938120702826, 0.03844673538188243, using:{'activation_function': 'tanh', 'kernel_initializer': 'normal'}
0.65

## Tuning of Hyperparameters : Number of neurons in activation layer

In [None]:
# defining the model

def create_model(neuron1, neuron2):
  model = Sequential()
  model.add(Dense(neuron1, input_dim = 8, kernel_initializer = 'uniform', activation = 'tanh'))
  model.add(Dense(neuron2, input_dim = 8, kernel_initializer = 'uniform', activation = 'tanh'))
  model.add(Dense(1, activation = 'sigmoid'))

  adam = Adam(learning_rate = 0.01)
  model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
  return model

# Create the model

model = KerasClassifier(model = create_model, verbose = 0, batch_size = 20, epochs = 10,neuron1 = [4,8,16], neuron2 = [2,4,8])

# Define the grid search parameters

neuron1 = [4,8,16]
neuron2 = [2,4,8]

# Make a dictionary of grid search parameters

param_grid = dict(neuron1 = neuron1, neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = KFold(), verbose = 5 )
grid_result = grid.fit(X_standardized, y)

In [None]:
# Summarize the results
print('Best: {}, using : {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{}, {}, using:{}'.format(mean,stdev,param))

Best: 0.776114081996435, using : {'neuron1': 8, 'neuron2': 8}
0.7630931160342925, 0.043627087599553034, using:{'neuron1': 4, 'neuron2': 2}
0.7696035990153638, 0.037971023630990544, using:{'neuron1': 4, 'neuron2': 4}
0.770927765045412, 0.040816915069101796, using:{'neuron1': 4, 'neuron2': 8}
0.7657159833630423, 0.03746651960427976, using:{'neuron1': 8, 'neuron2': 2}
0.7579067990832697, 0.04319670012975347, using:{'neuron1': 8, 'neuron2': 4}
0.776114081996435, 0.03658783021191213, using:{'neuron1': 8, 'neuron2': 8}
0.761811391223156, 0.04127295746546946, using:{'neuron1': 16, 'neuron2': 2}
0.7748153806977337, 0.03539795507181771, using:{'neuron1': 16, 'neuron2': 4}
0.7760971055088702, 0.03577667982162657, using:{'neuron1': 16, 'neuron2': 8}


## Training the model with Tuned values of Hyperparameters

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Defining the model

def create_model():
  model = Sequential()
  model.add(Dense(8, input_dim = 8, kernel_initializer = 'normal', activation = 'relu'))
  model.add(Dense(8, input_dim = 8, kernel_initializer = 'normal', activation = 'relu'))
  model.add(Dense(1, activation = 'sigmoid'))

  adam = Adam(learning_rate = 0.01)
  model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
  return model

# Create the model

model = KerasClassifier(model = create_model, verbose = 0, batch_size = 20, epochs = 10)

# fitting the model

model.fit(X_standardized, y)

# Predictions using trained model

y_predict = model.predict(X_standardized)

# Printing metrics

print(accuracy_score(y,y_predict))


0.7981770833333334


## Hyperparameters all at once

This process is more efficient but computationally expensive

In [None]:
def create_model(learning_rate, dropout_rate, activation_function, kernel_initializer, neuron1, neuron2):
  model = Sequential()
  model.add(Dense(neuron1, input_dim = 8, kernel_initializer = kernel_initializer, activation = activation_function))
  model.add(Dropout(dropout_rate))
  model.add(Dense(neuron2, input_dim = 8, kernel_initializer = kernel_initializer, activation = activation_function))
  model.add(Dropout(dropout_rate))
  model.add(Dense(1, activation = 'sigmoid'))

  adam = Adam(learning_rate = learning_rate)
  model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
  return model

# Create the model

model = KerasClassifier(model = create_model, verbose = 0, batch_size = [10,20,40], epochs = [15, 50, 100], learning_rate = [0.03, 0.01, 0.02],
dropout_rate = [0.0, 0.01,0.02],
activation_function = ['softmax', 'relu', 'tanh', 'linear'],
kernel_initializer = ['uniform', 'normal', 'zero'],
neuron1 = [7, 8, 9],
neuron2 = [7,8,9])

# Defining the grid search parameters

batch_size = [10,20,40]
epochs = [15, 50, 100]
learning_rate = [0.03, 0.01, 0.02]
dropout_rate = [0.0, 0.01,0.02]
activation_function = ['softmax', 'relu', 'tanh', 'linear']
kernel_initializer = ['uniform', 'normal', 'zero']
neuron1 = [7, 8, 9]
neuron2 = [7,8,9]

# Make the dictionary of grid search parameters

param_grid = dict(batch_size = batch_size, epochs = epochs, learning_rate = learning_rate, dropout_rate = dropout_rate, activation_function = activation_function, kernel_initializer = kernel_initializer, neuron1 = neuron1, neuron2 = neuron2)

# Build and find the grid search CV

grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = KFold(), verbose=10)
grid_result = grid.fit(X_standardized, y)



Fitting 5 folds for each of 8748 candidates, totalling 43740 fits
[CV 1/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=15, kernel_initializer=uniform, learning_rate=0.03, neuron1=7, neuron2=7
[CV 1/5; 1/8748] END activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=15, kernel_initializer=uniform, learning_rate=0.03, neuron1=7, neuron2=7;, score=0.708 total time=   4.4s
[CV 2/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=15, kernel_initializer=uniform, learning_rate=0.03, neuron1=7, neuron2=7
[CV 2/5; 1/8748] END activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=15, kernel_initializer=uniform, learning_rate=0.03, neuron1=7, neuron2=7;, score=0.688 total time=   2.9s
[CV 3/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=15, kernel_initializer=uniform, learning_rate=0.03, neuron1=7, neuron2=7
[CV 3/5; 1/8748] END activation_function=softma

In [None]:
# Summarize the results
print('Best: {}, using : {}'.format(grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{}, {}, using:{}'.format(mean,stdev,param))

Best: 0.776114081996435, using : {'neuron1': 8, 'neuron2': 8}
0.7630931160342925, 0.043627087599553034, using:{'neuron1': 4, 'neuron2': 2}
0.7696035990153638, 0.037971023630990544, using:{'neuron1': 4, 'neuron2': 4}
0.770927765045412, 0.040816915069101796, using:{'neuron1': 4, 'neuron2': 8}
0.7657159833630423, 0.03746651960427976, using:{'neuron1': 8, 'neuron2': 2}
0.7579067990832697, 0.04319670012975347, using:{'neuron1': 8, 'neuron2': 4}
0.776114081996435, 0.03658783021191213, using:{'neuron1': 8, 'neuron2': 8}
0.761811391223156, 0.04127295746546946, using:{'neuron1': 16, 'neuron2': 2}
0.7748153806977337, 0.03539795507181771, using:{'neuron1': 16, 'neuron2': 4}
0.7760971055088702, 0.03577667982162657, using:{'neuron1': 16, 'neuron2': 8}
