### Neural Networks

In [None]:
import os
import glob
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import MinMaxScaler
from tensorflow import set_random_seed

from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import ModelCheckpoint

import utils
from constants import DATA_DIR

In [2]:
np.random.seed(23)
set_random_seed(23)

warnings.filterwarnings("ignore")

In [3]:
def root_mean_squared_error(y_true, y_pred):
    # Custom loss function for keras
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 


def get_modelcheckpoint_path(model_num):
    #Create a file path for a model and save models in hdf5 files with datetime, validation losses and epochs
    parent = DATA_DIR+"/Models/NN/Model_{}/".format(model_num)
    child = pd.Timestamp.now().strftime('%Y%m%d-%Hh%Mm') + "-model-epoch_{epoch:02d}-rmse_{val_loss:.5f}.hdf5"
    return parent + child 


def get_weights_path_and_epoch(model_num):
    filepaths = glob.glob(DATA_DIR+"/Models/NN/Model_{}/*.hdf5".format(str(model_num)))
    losses = [float(filepath[-12:-5]) for filepath in filepaths]
    epochs = losses.index(min(losses))
    print('Model {} | Lowest Valid Error: {} at Epoch {}'.format(model_num, min(losses), epochs))
    return (filepaths[losses.index(min(losses))], epochs)


def plot_learning_process(hist_list):
    for i, hist in enumerate(hist_list):
        plt.subplot(1,1,1)  
        plt.plot(hist.history['loss'])  
        plt.plot(hist.history['val_loss'])  
        plt.title('Model Loss')  
        plt.ylabel('Loss')  
        plt.xlabel('Epoch')  
        plt.legend(['Train', 'Validation'], loc='upper right')  
        plt.show()
    return None


def run_models():
    for i in range(1, 4):
        model = KerasRegressor(build_fn=create_model(i),
                               epochs=100,
                               batch_size=32,
                               validation_split=0.2,
                               shuffle=True,
                               verbose=1)

        filepath = get_modelcheckpoint_path(i)
        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=False, verbose=0, mode='min')
        callbacks_list = [checkpoint]

        if i == 1:
            h1 = model.fit(X, y, callbacks=callbacks_list)
        elif i == 2:
            h2 = model.fit(X, y, callbacks=callbacks_list)
        elif i == 3:
            h3 = model.fit(X, y, callbacks=callbacks_list)
            
    return [h1, h2, h3]


def create_model(model_num):
    def model_1():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model

    def model_2():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model

    def model_3():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model
    
    if model_num == 1:
        return model_1
    elif model_num == 2:
        return model_2
    elif model_num == 3:
        return model_3
    else:
        return "invalid model_num"

In [4]:
X, y = utils.load_full_dataset('quad')

In [None]:
# Takes about 1 hr for training 100 epochs for all models
hist_list = run_models()

# The models seem to overfit at an early stage for all models
plot_learning_process(hist_list)

In [None]:
for i in range(1, 4):
    weights_path, epochs = get_weights_path_and_epoch(i)
    model = KerasRegressor(build_fn=create_model(i),
                           epochs=epochs,
                           batch_size=32,
                           validation_split=0.2,
                           shuffle=True,
                           verbose=0)
    
    errors = utils.cross_val(model, X, y, n_folds=5)
    utils.summarize_errors(errors, verbose=1)

Model 1 | Lowest Valid Error: 9.04445 at Epoch 28


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 41)                1722      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2688      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 6,523
Trainable params: 6,523
Non-trainable params: 0
_________________________________________________________________


In [None]:
### Neural Networks (Model 1) trained to Epoch 28
- MAE - Mean: 7.24544, SD: 0.04222
- RMSE - Mean: 9.50363, SD: 0.06278