In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor


from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [13]:
# Version check

import sklearn
import keras
import tensorflow as tf

print('Pandas version :' , pd.__version__)
print('NumPy version :' , np.__version__)
print('Sci-kit learn version :' , sklearn.__version__)
print('Keras version :' , keras.__version__)
print('With tensorflow backend, version : ', tf.__version__ )

Pandas version : 0.23.0
NumPy version : 1.14.3
Sci-kit learn version : 0.19.1
Keras version : 2.2.4
With tensorflow backend, version :  1.11.0


In [2]:
#Load data
from keras.datasets import boston_housing
(x_train,y_train),(x_test,y_test) = boston_housing.load_data()

In [3]:
x_train.shape, x_test.shape

((404, 13), (102, 13))

In [4]:
#To perform K-fold cross validation
#Concatnate train and test set into one single dataset


x_train = np.concatenate((x_train,x_test), axis=0)
y_train = np.concatenate((y_train,y_test), axis=0)

In [5]:
x_train.shape, y_train.shape

((506, 13), (506,))

In [6]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
                'TAX', 'PTRATIO', 'B', 'LSTAT']

key = ['Per capita crime rate.',
'The proportion of residential land zoned for lots over 25,000 square feet.',
'The proportion of non-retail business acres per town.',
'Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).',
'Nitric oxides concentration (parts per 10 million).',
'The average number of rooms per dwelling.',
'The proportion of owner-occupied units built before 1940.',
'Weighted distances to five Boston employment centers.',
'Index of accessibility to radial highways.',
'Full-value property-tax rate per $10,000.',
'Pupil-teacher ratio by town.',
'1000 * (Bk - 0.63) ** 2 where Bk is the proportion of Black people by town.',
'Percentage lower status of the population.']

col_index = dict(zip(column_names, key))
col_index

{'CRIM': 'Per capita crime rate.',
 'ZN': 'The proportion of residential land zoned for lots over 25,000 square feet.',
 'INDUS': 'The proportion of non-retail business acres per town.',
 'CHAS': 'Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).',
 'NOX': 'Nitric oxides concentration (parts per 10 million).',
 'RM': 'The average number of rooms per dwelling.',
 'AGE': 'The proportion of owner-occupied units built before 1940.',
 'DIS': 'Weighted distances to five Boston employment centers.',
 'RAD': 'Index of accessibility to radial highways.',
 'TAX': 'Full-value property-tax rate per $10,000.',
 'PTRATIO': 'Pupil-teacher ratio by town.',
 'B': '1000 * (Bk - 0.63) ** 2 where Bk is the proportion of Black people by town.',
 'LSTAT': 'Percentage lower status of the population.'}

In [7]:
df = pd.DataFrame(x_train, columns=column_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72
1,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11
2,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26
3,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01
4,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65


In [8]:
#Small model to establish baseline

def baseline_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


In [9]:
# Larger models below let you tune your model and determine where overfitting occurs,
# Then you can adjust the model hyperparameters (number of layers/neurons per layer accordingly)

def larger_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(6, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
    return model


In [10]:
def wider_model():
    model = Sequential()
    model.add(Dense(20, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
    return model



In [11]:
def wider_deeper_model():
    model = Sequential()
    model.add(Dense(25, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(18, kernel_initializer='normal', activation='relu'))
    model.add(Dense(7, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])
    return model



In [12]:
#define a list variable holding each model function

all_models = [baseline_model, larger_model, wider_model, wider_deeper_model ]


type(all_models[1])

function

In [14]:
#set seed for reproducability 
seed = 7

# Add a data Scaler and the keras regressor containing our model function to a list of estimators

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0)))
    
#add our estimator list to a Sklearn pipeline

pipeline = Pipeline(estimators)
 
#initialize instance of k-fold validation from sklearn api

kfold = KFold(n_splits=5, random_state=seed)

In [16]:
#for each model

for i in all_models:
    
    #initializae fixed random weights
    np.random.seed(seed)
    
    #pass pipeline instance, training data and labels, and k-fold crossvalidator instance to evaluate score

    results = cross_val_score(pipeline, x_train, y_train, cv=kfold)

    #Print name of model being evaluated    
    print('Model Type:', i)
    
    #The results variable contains the mean squared errors for each of our 5 cross validation runs.
    print('MSE per fold:')
    print(results)
    
    #Print mean and sd of all five folds, per model
    print("Average MSE of all 5 runs: %.2f, with standard dev: (%.2f)" % (-1*(results.mean()), results.std()))

Model Type: <function baseline_model at 0x000001454959C950>
MSE per fold:
[-14.16954346 -12.72713017 -17.84744423 -14.52326518 -17.4097627 ]
Average MSE of all 5 runs: 15.34, with standard dev: (1.97)
Model Type: <function larger_model at 0x000001454959CB70>
MSE per fold:
[-11.07775911 -12.70752338 -17.85225084 -14.55760158 -17.3656806 ]
Average MSE of all 5 runs: 14.71, with standard dev: (2.61)
Model Type: <function wider_model at 0x000001454959C730>
MSE per fold:
[-10.58079611 -12.73620375 -17.84509979 -14.44858215 -17.42180785]
Average MSE of all 5 runs: 14.61, with standard dev: (2.76)
Model Type: <function wider_deeper_model at 0x000001454959CE18>
MSE per fold:
[-14.14551844 -12.73632747 -17.87115498 -14.50908846 -17.36813697]
Average MSE of all 5 runs: 15.33, with standard dev: (1.97)
