<img src="./img/oscon.png" width="400">

# Hands-on with Keras and Scikit-learn

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Dense, Flatten
from keras.models import Sequential
from keras.datasets import mnist
from tensorflow.examples.tutorials.mnist import input_data
import sys
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
def create_model():
    return model

model = KerasClassifier(build_fn=create_model)

#### Default arguments that are passed on to the calls to model.fit()

In [None]:
def create_model():
    return model

model = KerasClassifier(build_fn=create_model, epochs=20)

#### New arguments can be passed to your custom create_model() function.

In [None]:
def create_model(dropout_rate=0.0):
    return model

model = KerasClassifier(build_fn=create_model, dropout_rate=0.2)

####  Using CV  to evaluate each individual model (default of 3-fold)

In [None]:
param_grid = dict(nb_epochs=[10,30,50]) 
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)

Once completed, you can access the outcome of the grid search in the result object returned from grid.fit(). The best_score_ member provides access to the best score observed during the optimization procedure and the best_params_ describes the combination of parameters that achieved the best results.

### Batch Size and Number of Epochs

In [None]:
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)

### Training Optimization Algorithm

In [None]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

###  Learning Rate and Momentum

In [None]:
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

### Network Weight Initialization

In [None]:
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(init_mode=init_mode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

###  Neuron Activation Function

In [None]:
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

###  Dropout Regularization

In [None]:
weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

###  Number of Neurons in the Hidden Layer

In [None]:
neurons = [1, 5, 10, 15, 20, 25, 30]
param_grid = dict(neurons=neurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [None]:
DATA_DIR = '/tmp/data' if not 'win32' in sys.platform else "c:\\tmp\\data"
data = input_data.read_data_sets(DATA_DIR, one_hot=True)
x_train, y_train = data.train.images,data.train.labels.astype(np.int32)
x_test, y_test = data.test.images,data.test.labels.astype(np.int32)
print(x_train.shape)

In [None]:
def create_model(optimizer='SGD'):
    # create model
    model = Sequential()
    model.add(Dense(10,input_shape = (784,),activation="softmax"))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model = KerasClassifier(build_fn=create_model, verbose=1, epochs=10, batch_size=128)
optimizer = ['SGD','adam','Adagrad', 'Adadelta']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
plt.bar([1,2,3,4],means,yerr=stds)
plt.ylim([0.8,0.95])
plt.ylabel('Accuracy',fontsize=20)
plt.show()

In [None]:
from keras.optimizers import SGD

def create_model(optimizer='SGD',learn_rate=0.01, momentum=0):
    # create model
    model = Sequential()
    model.add(Dense(10,input_shape = (784,),activation="softmax"))
    optimizer = SGD(lr=learn_rate, momentum=momentum)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
learn_rate = [0.2, 0.3, 0.5]
momentum = [0.0, 0.2, 0.4]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
model = KerasClassifier(build_fn=create_model, verbose=1, epochs=10, batch_size=128)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## Hands-on exercise 

### Part 1: test performance against parameters

In [None]:
DATA_DIR = '/tmp/data' if not 'win32' in sys.platform else "c:\\tmp\\data"
data = input_data.read_data_sets(DATA_DIR, one_hot=True)
x_train, y_train = data.train.images,data.train.labels.astype(np.int32)
x_test, y_test = data.test.images,data.test.labels.astype(np.int32)
print(x_train.shape)

In [None]:
def create_model():
    model = Sequential()
    ####
    ####
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model = KerasClassifier(build_fn=create_model, verbose=1, batch_size=128)

neurons = [20,30,50,100,200]
epochs = [4,6,8]

###

param_grid = dict()
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)


In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


plt.errorbar(neurons,means[:5],stds[:5],lw=3,alpha=0.7,color='b')
plt.plot(neurons,means[:5],'ok',ms=8)

plt.errorbar(neurons,means[5:10],stds[5:10],lw=3,alpha=0.7,color='g')
plt.plot(neurons,means[5:10],'ok',ms=8)

plt.errorbar(neurons,means[10:15],stds[10:15],lw=3,alpha=0.7,color='r')
plt.plot(neurons,means[10:15],'ok',ms=8)

plt.xlabel('Neurons',fontsize=20)
plt.ylabel('Accuracy',fontsize=20)
plt.title('Acc vs. # neurons')
plt.show()

### Part 2: fit parameters to model

In [None]:
import numpy as np
from sklearn import datasets, linear_model

###
###

regr = linear_model.LinearRegression()
regr.fit(x,y)
print('Coefficients: \n', regr.coef_)
plt.plot(y,regr.predict(x),'o',ms=10,alpha=0.7)
plt.plot(plt.xlim(), plt.ylim(), ls="--", c=".3")
plt.xlabel('Real means',fontsize=20)
plt.ylabel('Model prediction',fontsize=20)
plt.show()

### Solution part 1: test performance against parameters

In [None]:
%load ../solutions/keras_sklearn_1.py

### Solution part 2: fit parameters to model

In [None]:
%load ../solutions/keras_sklearn_2.py