## Meta -

This file shows sklearn grid search done using sklearn

## Author - Rahul Suresh

In [None]:
#try grid search, 10 fold cross validation

In [1]:
import numpy as np
from sklearn import datasets, svm
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

In [2]:
import sklearn

print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 0.20.2.


In [None]:
#perform grid search for MLP in keras
#keras models can be used in ski-kit learn by using KerasClassifier class

In [3]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras.datasets import mnist
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

Using TensorFlow backend.


In [4]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
print('Training data shape : ', train_images.shape, train_labels.shape)
 
print('Testing data shape : ', test_images.shape, test_labels.shape)
# Find the unique numbers from the train labels
classes = np.unique(train_labels)
nClasses = len(classes)

Training data shape :  (60000, 28, 28) (60000,)
Testing data shape :  (10000, 28, 28) (10000,)


In [5]:
# Change from matrix to array of dimension 28x28 to array of dimention 784
dimData=np.prod(train_images.shape[1:])
train_data = train_images.reshape(train_images.shape[0], dimData)
test_data = test_images.reshape(test_images.shape[0], dimData)
train_data = train_data.astype('float32')
test_data = test_data.astype('float32')
train_data /= 255
test_data /= 255
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

In [6]:
def create_model_compile(dropout1,dropout2):
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(dimData,), kernel_initializer='glorot_uniform'))
    model.add(Dropout(dropout1))
    model.add(Dense(512,activation='relu',kernel_initializer='glorot_uniform'))
    model.add(Dropout(dropout2))
    model.add(Dense(nClasses,activation='softmax',kernel_initializer='glorot_uniform'))
    model.compile(optimizer='adam', loss='kullback_leibler_divergence', metrics=['accuracy'])
    model.summary()
    return model

In [7]:
model = KerasClassifier(build_fn=create_model_compile, epochs=10, batch_size=256)

In [None]:
#parameter_candidates = [
#  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
#  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
#]

In [None]:
# Create a classifier object with the classifier and parameter candidates
#clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)

# Train the classifier on data1's feature and target data
#clf.fit(data1_features, data1_target)

In [9]:
param_grid = dict(dropout1=[0.2,0.5,0.52],dropout2=[0.2,0.5,0.52])
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(train_data, train_labels_one_hot)



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
param_grid

{'dropout1': [0.2, 0.5, 0.52], 'dropout2': [0.2, 0.5, 0.52]}

In [None]:
#points to note, use smaller subset of train data to get results faster

In [11]:
# View the accuracy score
print('Best score for train_data:', grid.best_score_)

Best score for train_data: 0.9779833333333333


In [12]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.977983 using {'dropout1': 0.52, 'dropout2': 0.2}
0.976867 (0.002098) with: {'dropout1': 0.2, 'dropout2': 0.2}
0.976400 (0.000698) with: {'dropout1': 0.2, 'dropout2': 0.5}
0.976433 (0.000660) with: {'dropout1': 0.2, 'dropout2': 0.52}
0.977700 (0.000991) with: {'dropout1': 0.5, 'dropout2': 0.2}
0.977283 (0.001426) with: {'dropout1': 0.5, 'dropout2': 0.5}
0.976383 (0.001076) with: {'dropout1': 0.5, 'dropout2': 0.52}
0.977983 (0.001674) with: {'dropout1': 0.52, 'dropout2': 0.2}
0.976783 (0.001100) with: {'dropout1': 0.52, 'dropout2': 0.5}
0.976017 (0.002118) with: {'dropout1': 0.52, 'dropout2': 0.52}


In [13]:
grid.score(test_data, test_labels_one_hot)



0.9823

In [None]:
#try to get number of epochs and batch size

In [None]:
#try this in keras
#I feel we need to use things like functions in create model to add layers and num neurons in each layer 
#by giving parameters explicitly

In [None]:
#how many epochs is it trained for?

In [18]:
def create_model_compile2():
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(dimData,), kernel_initializer='glorot_uniform'))
    model.add(Dropout(0.52))
    model.add(Dense(512,activation='relu',kernel_initializer='glorot_uniform'))
    model.add(Dropout(0.2))
    model.add(Dense(nClasses,activation='softmax',kernel_initializer='glorot_uniform'))
    model.compile(optimizer='adam', loss='kullback_leibler_divergence', metrics=['accuracy'])
    model.summary()
    return model

In [14]:
train_data_small= train_data[:2000]
train_labels_one_hot_small=train_labels_one_hot[:2000]

In [19]:
model2 = KerasClassifier(build_fn=create_model_compile2, verbose=1)

In [20]:
batch_size = [10, 20, 40, 80, 100,256,300]
epochs = [10, 50, 100]
param_grid2 = dict(batch_size=batch_size, epochs=epochs)
grid2 = GridSearchCV(estimator=model2, param_grid=param_grid2, n_jobs=-1)
grid_result2 = grid2.fit(train_data_small, train_labels_one_hot_small)



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_3 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Ep

In [21]:
# summarize results
print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))
means = grid_result2.cv_results_['mean_test_score']
stds = grid_result2.cv_results_['std_test_score']
params = grid_result2.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.918000 using {'batch_size': 10, 'epochs': 100}
0.899500 (0.012846) with: {'batch_size': 10, 'epochs': 10}
0.911500 (0.017986) with: {'batch_size': 10, 'epochs': 50}
0.918000 (0.017880) with: {'batch_size': 10, 'epochs': 100}
0.906500 (0.014034) with: {'batch_size': 20, 'epochs': 10}
0.907000 (0.011769) with: {'batch_size': 20, 'epochs': 50}
0.913500 (0.011336) with: {'batch_size': 20, 'epochs': 100}
0.903500 (0.016923) with: {'batch_size': 40, 'epochs': 10}
0.912500 (0.018662) with: {'batch_size': 40, 'epochs': 50}
0.907500 (0.022547) with: {'batch_size': 40, 'epochs': 100}
0.905000 (0.025091) with: {'batch_size': 80, 'epochs': 10}
0.907500 (0.017381) with: {'batch_size': 80, 'epochs': 50}
0.915000 (0.009309) with: {'batch_size': 80, 'epochs': 100}
0.901000 (0.014059) with: {'batch_size': 100, 'epochs': 10}
0.910000 (0.025259) with: {'batch_size': 100, 'epochs': 50}
0.911000 (0.017154) with: {'batch_size': 100, 'epochs': 100}
0.897000 (0.016476) with: {'batch_size': 256, 'epoch

In [22]:
grid.score(test_data, test_labels_one_hot)



0.9823