In [None]:
import numpy as np

# Load numpy arrays for training and testing data
train_X, train_Y = np.load('data/processed/train_X.npy'), np.load('data/processed/train_Y.npy')
test_X, test_Y = np.load('data/processed/test_X.npy'), np.load('data/processed/test_Y.npy')

In [None]:
# Gridsearch for keras LSTM regression model with hyperparameters
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, InputLayer
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from keras.callbacks import EarlyStopping
import tensorflow as tf

np.random.seed(42)
tf.set_random_seed(42)

def create_model(dropout_rate=0.0, LSTM_1_neurons=0, LSTM_2_neurons=0, loss='mae', optimizer='adam', batch_size=32, epochs=100):
    ''' Define the model architecture to be used (1 LSTM layer with dropout, 1 LSTM layer with dropout, 1 output dense layer) '''
    model = Sequential()
    model.add(InputLayer(input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(LSTM(LSTM_1_neurons, return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(LSTM_2_neurons, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(train_Y.shape[2], activation='linear'))
    model.compile(loss='mae', optimizer='adam')
    return model

# Perform gridsearch for hyperparameters
model = KerasRegressor(build_fn=create_model, verbose=2)
param_grid = {
    'dropout_rate': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
    'LSTM_1_neurons': [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'LSTM_2_neurons': [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'optimizer': ['adam', 'rmsprop'],
    'loss': ['mae', 'mse'],
    'batch_size': [32, 64, 128, 256, 512],
    'callbacks': [EarlyStopping(monitor='val_loss', patience=10, min_delta=0.0001, restore_best_weights=True)]
}

# Join train and test data for cross-validation
X = np.concatenate((train_X, test_X), axis=0)
Y = np.concatenate((train_Y, test_Y), axis=0)

# Gridsearch for best model but always use the same split for training and testing (i.e. use the same train_X, train_Y, test_X, test_Y)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=PredefinedSplit(test_fold=0), n_jobs=-1)