In [1]:
# import the required packages
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
import pandas as pd

Using TensorFlow backend.


In [2]:
# Load the dataset
X = pd.read_csv('data/traffic_volume_feats.csv')
y = pd.read_csv('data/traffic_volume_target.csv')

# Print the sizes of input data and output data
print("Input data size = ", X.shape)
print("Output size = ", y.shape)
# Print the range for output
print(f"Output Range = ({y['Volume'].min()}, { y['Volume'].max()})")

Input data size =  (10000, 10)
Output size =  (10000, 1)
Output Range = (0.000000, 584.000000)


In [3]:
# Create the function that returns the keras model 1
def build_model_1(optimizer='adam'):
    # create model 1
    model = Sequential()
    model.add(Dense(10, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


# Create the function that returns the keras model 2
def build_model_2(optimizer='adam'):
    # create model 2
    model = Sequential()
    model.add(Dense(10, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

# Create the function that returns the keras model 3
def build_model_3(optimizer='adam'):
    # create model 3
    model = Sequential()
    model.add(Dense(10, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [4]:
# define a seed for random number generator so the result will be reproducible
seed = 1
np.random.seed(seed)
# determine the number of folds for k-fold cross validation
n_folds = 5
# define the list to store cross validation scores
results =[]
# define the possible options for the model
models = [build_model_1, build_model_2, build_model_3]
# loop over models
for i in range(len(models)):
    # build the Scikit-Learn interface for the keras model
    regressor = KerasRegressor(build_fn=models[i], epochs=100, batch_size=50, verbose=0)
    # build the pipeline of transformations so for each fold trainind set will be scaled 
    # and test set will be scaled accordingly.
    model = make_pipeline(StandardScaler(), regressor)
    # define the cross validation iterator
    kfold = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # perform the k-fold cross validation. 
    # store the scores in result
    result = cross_val_score(model, X, y, cv=kfold)
    # add the scores to the results list 
    results.append(result)

In [5]:
# Print cross validation score for each model
for i in range(len(models)):
    print("Model ", i+1," test error rate = ", abs(results[i].mean()))

Model  1  test error rate =  25.384291048049924
Model  2  test error rate =  25.217141685485842
Model  3  test error rate =  25.38113227844238


In [6]:
# define a seed for random number generator so the result will be reproducible
np.random.seed(seed)
# determine the number of folds for k-fold cross validation
n_folds = 5
# define the list to store cross validation scores
results =[]
# define possible options for epochs and batch_size
epochs = [80, 100]
batches = [50, 25]
# loop over all possible pairs of epochs, batch_size
for i in range(len(epochs)):
    for j in range(len(batches)):
        # build the Scikit-Learn interface for the keras model
        regressor = KerasRegressor(build_fn=build_model_2, epochs=epochs[i], batch_size=batches[j], verbose=0)
        # build the pipeline of transformations so for each fold trainind set will be scaled 
        # and test set will be scaled accordingly.
        model = make_pipeline(StandardScaler(), regressor)
        # define the cross validation iterator
        kfold = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
        # perform the k-fold cross validation. 
        # store the scores in result
        result = cross_val_score(model, X, y, cv=kfold)
        # add the scores to the results list 
        results.append(result)

In [7]:
# Print cross validation score for each possible pair of epochs, batch_size
c = 0
for i in range(len(epochs)):
    for j in range(len(batches)):
        print("batch_size = ", batches[j],", epochs = ", epochs[i], ", Test error rate = ", abs(results[c].mean()))
        c += 1

batch_size =  50 , epochs =  80 , Test error rate =  25.273476047515867
batch_size =  25 , epochs =  80 , Test error rate =  25.308155863285062
batch_size =  50 , epochs =  100 , Test error rate =  25.082583103179935
batch_size =  25 , epochs =  100 , Test error rate =  25.255000259876248


In [8]:
# define a seed for random number generator so the result will be reproducible
np.random.seed(seed)
# determine the number of folds for k-fold cross validation
n_folds = 5
# define the list to store cross validation scores
results =[]
# define the possible options for the optimizer
optimizers = ['adam', 'sgd', 'rmsprop']
# loop over optimizers
for i in range(len(optimizers)):
    optimizer=optimizers[i]
    # build the Scikit-Learn interface for the keras model
    regressor = KerasRegressor(build_fn=build_model_2, epochs=100, batch_size=50, verbose=0)
    # build the pipeline of transformations so for each fold trainind set will be scaled 
    # and test set will be scaled accordingly.
    model = make_pipeline(StandardScaler(), regressor)
    # define the cross validation iterator
    kfold = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # perform the k-fold cross validation. 
    # store the scores in result
    result = cross_val_score(model, X, y, cv=kfold)
    # add the scores to the results list 
    results.append(result)

In [9]:
# Print cross validation score for each optimizer
for i in range(len(optimizers)):
    print("optimizer=", optimizers[i]," test error rate = ", abs(results[i].mean()))

optimizer= adam  test error rate =  25.229352216720578
optimizer= sgd  test error rate =  25.175298171043394
optimizer= rmsprop  test error rate =  25.208842034339902
