## Fine-Tuning Neural Network Model

This notebook assumes the data has been split into a training and a test set. If not, run get_data.ipynb first.

In [1]:
import pandas as pd

TRAINING_FILEPATH = 'data/training_set.csv'
TEST_FILEPATH = 'data/test_set.csv'

training_set = pd.read_csv(TRAINING_FILEPATH, index_col='index')
test_set = pd.read_csv(TEST_FILEPATH, index_col='index')

In [2]:
from preprocessing_utils import stratify_split_imdb

# create a validation set to evaluate models instead of using 5-fold cross validation which is time-consuming for neural networks
small_training_set, validation_set = stratify_split_imdb(training_set, test_size=0.2)

In [3]:
print(small_training_set.shape, validation_set.shape)

(8672, 44) (2168, 44)


In [4]:
from preprocessing_utils import FeaturePreprocessor, separate_features_targets

train_X, train_y = separate_features_targets(small_training_set)
val_X, val_y = separate_features_targets(validation_set)

# preprocess training features (add combinations, power transform)
preprocessor = FeaturePreprocessor(add_combinations=True, powertransform_num=True, onehot_type=True)
train_X = preprocessor.fit_transform(train_X)
val_X = preprocessor.transform(val_X)

In [5]:
# TODO: create a base neural network model
# TODO: train the base model
#   TODO: use tensorboard to visualize training and validation error
# TODO: fine-tune the model using random search and grid search
# TODO: use early stopping to train the best model

In [6]:
import tensorflow as tf
import tensorflow.keras as keras

def create_neuralnet(input_shape, n_hidden=1, n_neurons=32, hidden_activation='relu'):
    model = keras.models.Sequential()
    
    # input layer
    model.add(keras.layers.Dense(n_neurons, input_shape=input_shape, activation=hidden_activation))
    
    # hidden layers
    for i in range(n_hidden-1):
        model.add(keras.layers.Dense(n_neurons, activation=hidden_activation))
    
    # output layer
    model.add(keras.layers.Dense(1))

    # compile the model
    # TODO: make the optimizer or SGD learning rate a hyperparameter
    model.compile(loss='mean_squared_error', optimizer='sgd', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
    
    return model

In [7]:
from train_utils import get_fit_logdir

# generate tensorboard logs
logdir = get_fit_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(logdir)

# create the base model
base_neuralnet = create_neuralnet(input_shape=train_X.shape[1:], n_hidden=1, n_neurons=32, hidden_activation='relu')

# train the model
base_neuralnet.fit(train_X, train_y, batch_size=32, epochs=150, validation_data=(val_X, val_y), callbacks=[tensorboard_cb])

- loss: 0.7092 - rmse: 0.8421 - val_loss: 0.7329 - val_rmse: 0.8561
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/

<tensorflow.python.keras.callbacks.History at 0x7f24a4355710>

In [8]:
from train_utils import evaluate_keras_model

base_neuralnet_errors = evaluate_keras_model(base_neuralnet, train_X, train_y, val_X, val_y)
base_neuralnet_errors

Unnamed: 0,train_rmse,val_rmse
0,0.737455,0.813141


In [9]:
import os
import time
import numpy as np
from train_utils import TENSORBOARD_LOGS_DIR
from train_utils import evaluate_keras_model
from sklearn.model_selection import ParameterGrid


def neuralnet_random_search(param_grid, n_iter, train_X, train_y, val_X, val_y, verbose=False):
    """
    Randomized hyperparameter search for neural network models
    """

    param_grid = ParameterGrid(param_grid)
    grid_perms = np.random.permutation(param_grid)[:n_iter]

    base_log_dir = os.path.join(TENSORBOARD_LOGS_DIR, "rndsearch_" + time.strftime("%Y_%m_%d-%H_%M_%S"))
    search_results = {
        'params': [],
        'best_val_rmse': float('inf'),
        'best_index': None,
        'best_model': None,
        'best_params': None
    }

    if verbose:
        print(f"Fitting {n_iter} models\n")

    for i, params in enumerate(grid_perms):
        # generate tensorboard logs
        logdir = os.path.join(base_log_dir, f"fit_{i}")
        tensorboard_cb = keras.callbacks.TensorBoard(logdir)
        cb = [tensorboard_cb]

        # create the model
        model = create_neuralnet(input_shape=train_X.shape[1:],
                                 n_hidden=params['n_hidden'],
                                 n_neurons=params['n_neurons'],
                                 hidden_activation=params['hidden_activation'])

        if verbose:
            print(f"Fit {i}\t{params}")
            print(f"Logging to {logdir}")

        # fit the model to the data
        model.fit(train_X, train_y,
                  batch_size=params['batch_size'],
                  epochs=params['epochs'],
                  validation_data=(val_X, val_y),
                  callbacks=cb,
                  verbose=(0 if not verbose else 1))
        
        # evaluate the model
        errors = evaluate_keras_model(model, train_X, train_y, val_X, val_y)
        train_rmse = errors['train_rmse'][0]
        val_rmse = errors['val_rmse'][0]

        # update search results
        param_entry = params.copy()
        param_entry['train_rmse'] = train_rmse
        param_entry['val_rmse'] = val_rmse
        param_entry['logdir'] = logdir
        search_results['params'].append(param_entry)

        # check if this is the best model so far
        if val_rmse < search_results['best_val_rmse']:
            search_results['best_val_rmse'] = val_rmse
            search_results['best_index'] = i
            search_results['best_model'] = model
            search_results['best_params'] = params
        
    return search_results

In [10]:
neuralnet_rndsearch_grid = {
    'n_hidden': [1, 2, 4],
    'n_neurons':[16, 32],
    'hidden_activation': ['relu', 'sigmoid'],
    'epochs': [150],
    'batch_size': [4, 8, 16, 32, 64, 128],
}

rndsearch_results = neuralnet_random_search(neuralnet_rndsearch_grid, 15, train_X, train_y, val_X, val_y, verbose=True)

- loss: 0.6957 - rmse: 0.8341 - val_loss: 0.6971 - val_rmse: 0.8350
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/

In [12]:
import os
from train_utils import MODELS_DIR

# save the best model
filepath = os.path.join(MODELS_DIR, 'rndsearch_neuralnet.h5')
rndsearch_results['best_model'].save(filepath)

In [23]:
pd.DataFrame(rndsearch_results['params']).sort_values(by=['batch_size', 'n_hidden', 'n_neurons'])

Unnamed: 0,batch_size,epochs,hidden_activation,logdir,n_hidden,n_neurons,train_rmse,val_rmse
7,8,150,relu,train_logs/tensorboard/rndsearch_2020_05_29-06...,1,16,0.744343,0.804577
10,8,150,sigmoid,train_logs/tensorboard/rndsearch_2020_05_29-06...,1,32,0.752611,0.798635
3,8,150,sigmoid,train_logs/tensorboard/rndsearch_2020_05_29-06...,2,16,0.781877,0.805149
6,8,150,relu,train_logs/tensorboard/rndsearch_2020_05_29-06...,4,32,0.515488,0.920577
1,16,150,sigmoid,train_logs/tensorboard/rndsearch_2020_05_29-06...,1,16,0.792237,0.810036
14,16,150,relu,train_logs/tensorboard/rndsearch_2020_05_29-06...,1,32,0.732304,0.811502
8,16,150,relu,train_logs/tensorboard/rndsearch_2020_05_29-06...,2,16,0.724609,0.832788
11,16,150,sigmoid,train_logs/tensorboard/rndsearch_2020_05_29-06...,4,16,0.79185,0.804324
5,16,150,sigmoid,train_logs/tensorboard/rndsearch_2020_05_29-06...,4,32,0.782377,0.797977
4,32,150,relu,train_logs/tensorboard/rndsearch_2020_05_29-06...,1,32,0.740553,0.806655


In [None]:
# Good hyperparameters (decided by examining error plots for different models)
# - batch_size=64
# - n_hidden=2
# - n_neurons=16
# - hidden_activation='relu'

In [27]:
from keras.callbacks import EarlyStopping

# generate tensorboard logs
logdir = get_fit_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(logdir)

# early stopping callback
earlystopping_cb = EarlyStopping(monitor='val_rmse', mode='min', patience=20, min_delta=0.00005, restore_best_weights=True)

# train a model with good hyperparameters
good_neuralnet = create_neuralnet(input_shape=train_X.shape[1:], n_hidden=2, n_neurons=16, hidden_activation='relu')

# train the model
good_neuralnet.fit(train_X, train_y, batch_size=64, epochs=1000,
                   validation_data=(val_X, val_y), callbacks=[tensorboard_cb, earlystopping_cb])

- rmse: 0.8200 - val_loss: 0.7049 - val_rmse: 0.8396
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/

<tensorflow.python.keras.callbacks.History at 0x7f246443f240>

In [28]:
evaluate_keras_model(good_neuralnet, train_X, train_y, val_X, val_y)

Unnamed: 0,train_rmse,val_rmse
0,0.75653,0.794093


This seems like the best model so far

In [34]:
from sklearn.metrics import mean_squared_error

# preprocess the test set
test_X, test_y = separate_features_targets(test_set)
test_X = preprocessor.transform(test_X)

# evaluate this model on the test set
test_pred_y = good_neuralnet.predict(test_X)
test_rmse = np.sqrt(mean_squared_error(test_y, test_pred_y))
print(test_rmse)

0.781313807262355


In [35]:
# save the best model
best_neuralnet = good_neuralnet
filepath = os.path.join(MODELS_DIR, 'best_neuralnet.h5')
best_neuralnet.save(filepath)