In [273]:
## learning the keras API on toy data
import keras
from keras import layers, Sequential, Input
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.pipeline import Pipeline
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import FunctionTransformer, StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.linear_model import ARDRegression
from sklearn.decomposition import PCA

In [248]:
(x_train, y_train), (x_test, y_test) = keras.datasets.boston_housing.load_data(
    path="boston_housing.npz", test_split=0.2, seed=113
)
x_train.shape

(404, 13)

In [249]:
def x_train_pplot(x_train):
    df = pd.DataFrame(x_train)
    sns.pairplot(df)
    plt.show()
# x_train_pplot(x_train)

In [263]:
all_metrics = ['MeanSquaredError', 'RootMeanSquaredError', 'MeanAbsoluteError', 'MeanAbsolutePercentageError', 'MeanSquaredLogarithmicError', 'CosineSimilarity', 'LogCoshError', ]
all_activators = ['relu','sigmoid','softmax','softplus','softsign','tanh','selu','elu','exponential']
all_optimizers = ['SGD','RMSprop','Adam','Adadelta','Adagrad','Adamax','Nadam','Ftrl']
def make_model(activate_fn_1='elu', activate_fn_2='selu', optimize_fn='Adam', metrics_list=all_metrics):
    model = Sequential()
    model.add(Input(shape=(13)))
    model.add(layers.Dense(64, activation=activate_fn_1))
    model.add(layers.Dense(64, activation=activate_fn_1))
    model.add(layers.Dense(32, activation=activate_fn_1))
    model.add(layers.Dense(1, activation=activate_fn_2))
    model.compile(optimizer=optimize_fn, loss='mse', metrics = all_metrics)
    return model
def make_wide_deep_model(activate_fn_1='elu', activate_fn_2='selu', optimize_fn='Adam', metrics_list=all_metrics):
    model = Sequential()
    model.add(Input(shape=(13)))
    model.add(layers.Dense(128, activation=activate_fn_1))
    model.add(layers.Dense(128, activation=activate_fn_1))
    model.add(layers.Dense(64, activation=activate_fn_1))
    model.add(layers.Dense(64, activation=activate_fn_1))
    model.add(layers.Dense(32, activation=activate_fn_1))
    model.add(layers.Dense(1, activation=activate_fn_2))
    model.compile(optimizer=optimize_fn, loss='mse', metrics = all_metrics)
    return model
# all_models_strs = ['make_dense_model','make_deep_model','make_wide_model', 'make_wide_deep_model']
all_models = [make_model, make_wide_deep_model]

In [266]:
# model = make_wide_deep_model()
# print(model.input_spec)
# model.fit(x_train, y_train, batch_size=32, epochs=10)
# print(model.input_spec)
# model.evaluate(x_test, y_test)

In [283]:
## trying out sklearn gridcv optimization
mse = make_scorer(mean_squared_error, greater_is_better=False)
keras_regs = [KerasRegressor(build_fn=model, nb_epoch=100, batch_size=20, verbose=0) for model in all_models]
pipe = Pipeline([('preprocess', None), ('estimator', ARDRegression())])
param_grid = [
    {
        ## base line 'best' for classical regression
        'preprocess': [StandardScaler(), PCA(), ,  None],
        'estimator': [ARDRegression()]
    },
    {
    'preprocess': [PCA(), None],
    'estimator': keras_regs,
#     'estimator__build_fn': all_models,
    'estimator__activate_fn_1': ['elu', 'selu'],#all_activators,
    'estimator__activate_fn_2': ['selu', linear], #all_activators,
#     'estimator__optimize_fn': all_optimizers
    'estimator__optimize_fn': ['Adam'],
    'estimator__nb_epoch': [100],
    'estimator__batch_size': [20,40],
    }
]
grid = GridSearchCV(pipe, param_grid, cv=3, verbose = 3, scoring=mse)

# print(grid.get_params())
# pipe.get_params()

In [284]:
grid.fit(x_train, y_train)

Fitting 3 folds for each of 19 candidates, totalling 57 fits
[CV 1/3] END estimator=ARDRegression(), preprocess=StandardScaler(); total time=   0.0s
[CV 2/3] END estimator=ARDRegression(), preprocess=StandardScaler(); total time=   0.0s
[CV 3/3] END estimator=ARDRegression(), preprocess=StandardScaler(); total time=   0.0s
[CV 1/3] END ....estimator=ARDRegression(), preprocess=PCA(); total time=   0.0s
[CV 2/3] END ....estimator=ARDRegression(), preprocess=PCA(); total time=   0.0s
[CV 3/3] END ....estimator=ARDRegression(), preprocess=PCA(); total time=   0.0s
[CV 1/3] END .....estimator=ARDRegression(), preprocess=None; total time=   0.0s
[CV 2/3] END .....estimator=ARDRegression(), preprocess=None; total time=   0.0s
[CV 3/3] END .....estimator=ARDRegression(), preprocess=None; total time=   0.0s
[CV 1/3] END estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd688d0100>, estimator__activate_fn_1=elu, estimator__activate_fn_2=selu, estimator__batch

[CV 3/3] END estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd5cf487f0>, estimator__activate_fn_1=elu, estimator__activate_fn_2=selu, estimator__batch_size=20, estimator__nb_epoch=100, estimator__optimize_fn=Adam, preprocess=PCA(); total time=   0.8s
[CV 1/3] END estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd5cf487f0>, estimator__activate_fn_1=elu, estimator__activate_fn_2=selu, estimator__batch_size=20, estimator__nb_epoch=100, estimator__optimize_fn=Adam, preprocess=None; total time=   0.9s
[CV 2/3] END estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd5cf487f0>, estimator__activate_fn_1=elu, estimator__activate_fn_2=selu, estimator__batch_size=20, estimator__nb_epoch=100, estimator__optimize_fn=Adam, preprocess=None; total time=   0.8s
[CV 3/3] END estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd5cf487f0>, estimator__activate_

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('preprocess', None),
                                       ('estimator', ARDRegression())]),
             param_grid=[{'estimator': [ARDRegression()],
                          'preprocess': [StandardScaler(), PCA(), None]},
                         {'estimator': [<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd688d0100>,
                                        <tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor object at 0x7ffd5cf487f0>],
                          'estimator__activate_fn_1': ['elu', 'selu'],
                          'estimator__activate_fn_2': ['selu'],
                          'estimator__batch_size': [20, 40],
                          'estimator__nb_epoch': [100],
                          'estimator__optimize_fn': ['Adam'],
                          'preprocess': [PCA(), None]}],
             scoring=make_scorer(mean_squared_error, greater_is_better=False),
      

In [285]:
est = grid.best_estimator_
print('best score {}, scorer {}'.format( grid.best_score_, grid.scorer_))
grid_pred = est.predict(x_test)
print('mean_squared_error', mean_squared_error(y_test, grid_pred))
grid.best_params_

best score -26.131199302028516, scorer make_scorer(mean_squared_error, greater_is_better=False)
mean_squared_error 22.70486893093589


{'estimator': ARDRegression(), 'preprocess': StandardScaler()}

In [286]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_estimator,param_preprocess,param_estimator__activate_fn_1,param_estimator__activate_fn_2,param_estimator__batch_size,param_estimator__nb_epoch,param_estimator__optimize_fn,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.017896,0.006215,0.000606,2e-05,ARDRegression(),StandardScaler(),,,,,,"{'estimator': ARDRegression(), 'preprocess': S...",-23.75754,-22.951558,-31.684499,-26.131199,3.940538,1
1,0.008512,0.002466,0.00075,8e-05,ARDRegression(),PCA(),,,,,,"{'estimator': ARDRegression(), 'preprocess': P...",-24.492426,-22.166154,-32.404226,-26.354269,4.382113,2
2,0.008913,0.001307,0.000448,0.000115,ARDRegression(),,,,,,,"{'estimator': ARDRegression(), 'preprocess': N...",-24.698112,-22.963606,-33.072201,-26.911306,4.413585,3
3,0.853586,0.167995,0.08281,0.015773,<tensorflow.python.keras.wrappers.scikit_learn...,PCA(),elu,selu,20.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-151.4738,-91.866091,-190.562011,-144.633967,40.581676,12
4,0.796445,0.117215,0.070407,0.001311,<tensorflow.python.keras.wrappers.scikit_learn...,,elu,selu,20.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-92.366265,-228.727508,-107.484043,-142.859272,61.030879,11
5,0.712531,0.025785,0.073934,0.006667,<tensorflow.python.keras.wrappers.scikit_learn...,PCA(),elu,selu,40.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-179.003798,-127.651446,-271.061088,-192.572111,59.327654,17
6,0.747638,0.061252,0.071863,0.003156,<tensorflow.python.keras.wrappers.scikit_learn...,,elu,selu,40.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-278.308705,-96.306308,-94.816355,-156.477122,86.150085,14
7,0.6929,0.03934,0.070941,0.000948,<tensorflow.python.keras.wrappers.scikit_learn...,PCA(),selu,selu,20.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-167.651772,-140.468113,-105.071059,-137.730315,25.62171,10
8,0.665143,0.00057,0.071753,0.002142,<tensorflow.python.keras.wrappers.scikit_learn...,,selu,selu,20.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-121.048142,-58.034572,-731.531465,-303.53806,303.728436,18
9,1.041541,0.529904,0.070145,0.002179,<tensorflow.python.keras.wrappers.scikit_learn...,PCA(),selu,selu,40.0,100.0,Adam,{'estimator': <tensorflow.python.keras.wrapper...,-231.512968,-118.041885,-220.74721,-190.100688,51.142472,16


from different models:
{'estimator': make_wide_deep_model,
 'estimator__activate_fn_1': 'elu',
 'estimator__activate_fn_2': 'selu',
 'estimator__batch_size': 20,
 'estimator__nb_epoch': 100,
 'estimator__optimize_fn': 'Adam',
 'preprocess': None}
from adagrad, adam, elu,selu:
{'estimator': <tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor at 0x7ffd3f492a90>,
 'estimator__activate_fn_1': 'elu',
 'estimator__activate_fn_2': 'selu',
 'estimator__batch_size': 20,
 'estimator__nb_epoch': 100,
 'estimator__optimize_fn': 'Adam',
 'preprocess': None}
{'estimator__activate_fn_1': 'selu',
 'estimator__activate_fn_2': 'selu',
 'estimator__optimize_fn': 'Adam'}
fixed adagrad
{'estimator__activate_fn_1': 'selu',
 'estimator__activate_fn_2': 'selu',
 'estimator__optimize_fn': 'Adagrad'}
fixed activate
{'estimator__activate_fn_1': 'elu',
 'estimator__activate_fn_2': 'selu',
 'estimator__optimize_fn': 'Adagrad'}
fixed adam
{'estimator__activate_fn_1': 'elu',
 'estimator__activate_fn_2': 'selu',
 'estimator__optimize_fn': 'Adam'}