In [1]:
# Load Packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

from typing import List

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from scikeras.wrappers import KerasRegressor
from keras.constraints import MaxNorm

import keras

import os, sys
rootpath = ".."
sys.path.insert(0, f"{os.getcwd()}/{rootpath}/base_models")
import model_prep

step_back = 6  # window size = 6*5 = 30 mins

2023-11-06 11:51:29.040281: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(dropout_rate = 0.0, weight_constraint = 2.0, lstmcells = 32, activation = 'tanh', optimizer = "Adamax"):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        model.compile(loss='mse', optimizer=optimizer)
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [3]:
batch_size = [32, 64, 72, 128] # selecting 32
epochs = [50, 100, 200]  # selecting 200
param_grid = dict(batch_size=batch_size, epochs=epochs)
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

# dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
# weight_constraint = [1.0, 2.0, 3.0, 4.0, 5.0]
# neurons = [6, 16, 32, 64, 128]
# lstmcells = [1, 6, 16, 32, 64]
# activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
# param_grid = dict(model__dropout_rate=dropout_rate, model__weight_constraint=weight_constraint)
# param_grid = dict(model__activation=activation)

2023-11-06 11:51:36.698868: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Best: 0.988032 using {'batch_size': 32, 'epochs': 200}
0.929549 (0.041001) with: {'batch_size': 32, 'epochs': 50}
0.983051 (0.002779) with: {'batch_size': 32, 'epochs': 100}
0.988032 (0.001095) with: {'batch_size': 32, 'epochs': 200}
0.861671 (0.001889) with: {'batch_size': 64, 'epochs': 50}
0.967328 (0.015166) with: {'batch_size': 64, 'epochs': 100}
0.986758 (0.001037) with: {'batch_size': 64, 'epochs': 200}
0.829893 (0.003596) with: {'batch_size': 72, 'epochs': 50}
0.972844 (0.003459) with: {'batch_size': 72, 'epochs': 100}
0.986628 (0.000901) with: {'batch_size': 72, 'epochs': 200}
0.458497 (0.002709) with: {'batch_size': 128, 'epochs': 50}
0.889980 (0.028496) with: {'batch_size': 128, 'epochs': 100}
0.983451 (0.000938) with: {'batch_size': 128, 'epochs': 200}
GridSearchCV(cv=3,
             estimator=KerasRegressor(build_fn=<function run_grid_search.<locals>.create_model at 0x7faf7086a670>, verbose=0),
             param_grid={'batch_size': [32, 64, 72, 128],
                      

In [10]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(dropout_rate = 0.0, weight_constraint = 2.0, lstmcells = 32, activation = 'tanh'):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", batch_size=32, epochs=200, verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [11]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax'] # selecting Adam
param_grid = dict(optimizer=optimizer)
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

Best: 0.989732 using {'optimizer': 'Adam'}
-0.005965 (0.007950) with: {'optimizer': 'SGD'}
0.988903 (0.001481) with: {'optimizer': 'RMSprop'}
-0.758298 (0.003863) with: {'optimizer': 'Adagrad'}
-0.766484 (0.012862) with: {'optimizer': 'Adadelta'}
0.989732 (0.001529) with: {'optimizer': 'Adam'}
0.988291 (0.000985) with: {'optimizer': 'Adamax'}
GridSearchCV(cv=3,
             estimator=KerasRegressor(batch_size=32, build_fn=<function run_grid_search.<locals>.create_model at 0x7faf744c1a60>, epochs=200, loss='mse', verbose=0),
             param_grid={'optimizer': ['SGD', 'RMSprop', 'Adagrad', 'Adadelta',
                                       'Adam', 'Adamax']})


In [14]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(activation, dropout_rate = 0.0, weight_constraint = 2.0, lstmcells = 32, optimizer = "Adam"):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        model.compile(loss='mse', optimizer=optimizer)
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [15]:
activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'] # selecting relu
param_grid = dict(model__activation=activation)
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

Best: 0.978805 using {'model__activation': 'relu'}
0.978805 (0.001613) with: {'model__activation': 'relu'}
-0.823315 (0.017714) with: {'model__activation': 'tanh'}
-0.921345 (0.028926) with: {'model__activation': 'sigmoid'}
-0.915546 (0.024585) with: {'model__activation': 'hard_sigmoid'}
0.977459 (0.001476) with: {'model__activation': 'linear'}
GridSearchCV(cv=3,
             estimator=KerasRegressor(build_fn=<function run_grid_search.<locals>.create_model at 0x7faf73a039d0>, loss='mse', verbose=0),
             param_grid={'model__activation': ['relu', 'tanh', 'sigmoid',
                                               'hard_sigmoid', 'linear']})


In [18]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(lstmcells, activation="relu", dropout_rate = 0.0, weight_constraint = 2.0, optimizer = "Adam"):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        model.compile(loss='mse', optimizer=optimizer)
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [19]:
param_grid = dict(model__lstmcells=[16, 32, 64, 128]) # selecting 64
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

Best: 0.981176 using {'model__lstmcells': 64}
0.976398 (0.002598) with: {'model__lstmcells': 16}
0.977888 (0.002823) with: {'model__lstmcells': 32}
0.981176 (0.000516) with: {'model__lstmcells': 64}
0.980304 (0.002244) with: {'model__lstmcells': 128}
GridSearchCV(cv=3,
             estimator=KerasRegressor(build_fn=<function run_grid_search.<locals>.create_model at 0x7faf3187daf0>, loss='mse', verbose=0),
             param_grid={'model__lstmcells': [16, 32, 64, 128]})


In [21]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(dropout_rate, weight_constraint, lstmcells=64, activation="relu", optimizer = "Adam"):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        model.compile(loss='mse', optimizer=optimizer)
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [22]:
weight_constraint = [1.0, 2.0, 3.0, 4.0, 5.0] # select 0.0
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] # select 4.0
param_grid = dict(model__dropout_rate=dropout_rate, model__weight_constraint=weight_constraint)
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

Best: 0.980991 using {'model__dropout_rate': 0.0, 'model__weight_constraint': 4.0}
0.980350 (0.001786) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 1.0}
0.980659 (0.000954) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 2.0}
0.977632 (0.003685) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 3.0}
0.980991 (0.001114) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 4.0}
0.979406 (0.003307) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 5.0}
0.977514 (0.000792) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 1.0}
0.978909 (0.000751) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 2.0}
0.977338 (0.001237) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 3.0}
0.980686 (0.000716) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 4.0}
0.979798 (0.000870) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 5.0}
0.979005 (0.001795) with: {'model__dr

In [27]:
def run_grid_search(building_name, tower_number, season, param_grid, use_delta=True, train_percentage=0.75, shuffle_seed=42):
    features = ['FlowEvap', 'PerHumidity', 'TempAmbient', 'TempCondIn',
       'TempCondOut', 'TempEvapIn', 'TempEvapOut', 'TempWetBulb',
       'PerFreqConP', 'Tonnage','DayOfWeek', 'HourOfDay', 'PerFreqFan']
    target = 'EnergyConsumption'

    """
    1. Convert data into a model-compatible shape
    """

    lstm_df, _ = model_prep.create_preprocessed_lstm_df(
        building_name=building_name,
        tower_number=tower_number,
        features=features,
        target=target,
        season=season,
        use_delta=use_delta,
        step_back=6
    )
    if not season:
        season = "allyear"

    """
    2. Split data into training and testing sets
    """

    X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
    y = lstm_df[f"{target}(t)"]  # only have target column

    # split into input and outputs
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=(1 - train_percentage), shuffle=True, random_state=shuffle_seed
    )

    # scale feature data
    scaler = MinMaxScaler().fit(X_train)
    X_train[X_train.columns] = scaler.transform(X_train)
    X_test[X_test.columns] = scaler.transform(X_test)

    """
    3. Get timestepped data as a 3D vector
    """
    vec_X_train = model_prep.df_to_3d(
        lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
    )
    vec_X_test = model_prep.df_to_3d(
        lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
    )

    vec_y_train = y_train.values
    vec_y_test = y_test.values

    """
    4. Create and Train model
    """
    # Create a function that builds the Keras model
    def create_model(step_back, dropout_rate=0.0, weight_constraint=4.0, lstmcells=64, activation="relu", optimizer = "Adam"):
        model = keras.models.Sequential()
        model.add(
            keras.layers.LSTM(
                lstmcells,
                input_shape=(vec_X_train.shape[1], vec_X_train.shape[2]),
                kernel_constraint=MaxNorm(weight_constraint),
                recurrent_dropout=dropout_rate,
                activation=activation
            )
        )
        model.add(keras.layers.Dense(1))
        model.compile(loss='mse', optimizer=optimizer)
        return model

    # Create a KerasClassifier
    model = KerasRegressor(build_fn=create_model, loss="mse", verbose=0)

    # Create GridSearchCV and perform the grid search
    grid_search = GridSearchCV(model, param_grid, cv=3)
    grid_result = grid_search.fit(vec_X_train, vec_y_train)

    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    print(grid_result)

In [28]:
param_grid = dict(model__step_back=[1, 3, 6, 12, 24]) # selecting step_back = 6

# Call the run_grid_search function with the modified param_grid
run_grid_search(building_name="ESB", tower_number=1, season="summer", param_grid=param_grid)

1
1
1
3
3
3
6
6
6
12
12
12
24
24
24
6
Best: 0.980671 using {'model__step_back': 6}
0.979950 (0.001641) with: {'model__step_back': 1}
0.980481 (0.000253) with: {'model__step_back': 3}
0.980671 (0.001003) with: {'model__step_back': 6}
0.980418 (0.002116) with: {'model__step_back': 12}
0.979959 (0.000832) with: {'model__step_back': 24}
GridSearchCV(cv=3,
             estimator=KerasRegressor(build_fn=<function run_grid_search.<locals>.create_model at 0x7faf56a7aee0>, loss='mse', verbose=0),
             param_grid={'model__step_back': [1, 3, 6, 12, 24]})


```
batch size: 32
epochs: 200
optimizer: Adam
dropout rate: 0.0
weight constraints: 4.0
step_back: 6
lstm cells: 64
activation: relu
```