In [1]:
import pandas as pd
import numpy as np
import pickle
import time
import os.path

from numpy import array
from datetime import timedelta  
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM, GRU
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.decomposition import PCA
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from fbprophet import Prophet
from scipy.stats import boxcox
from scipy.special import inv_boxcox
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResults
from pmdarima import auto_arima
from datetime import timedelta 

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

import plotly
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode

Using TensorFlow backend.


In [2]:
timestamp = "timestamp"
predictionColumn = "cpuusage_ps"

In [3]:
# split a multivariate sequence into samples
# https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/
def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

def split_train_test(x, y, pred_horizon):
    x_train = x[:-1]
    x_test = x[-1:]
    y_train = y[:-1]
    y_test = y[-1:]

    return x_train, x_test, y_train, y_test

def split_train_validation(x, y, split):
    x_train = x[:-split]
    x_val = x[-split:]
    y_train = y[:-split]
    y_val = y[-split:]
    
    return x_train, x_val, y_train, y_val


In [14]:
def create_lstm(numberOfHiddenLayers=3, numberOfCells=50, activation="relu", n_history=1, pred_horizon=1, numberOfFeatures=1):
    model = Sequential()
    model.add(LSTM(numberOfCells, activation=activation, return_sequences=True, input_shape=(n_history, numberOfFeatures)))
    for i in range(numberOfHiddenLayers-1):
        model.add(LSTM(numberOfCells, activation=activation, return_sequences=True))
        
    model.add(LSTM(numberOfCells, activation=activation, return_sequences=False))    
    model.add(Dense(pred_horizon, activation=activation))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

def create_gru(numberOfHiddenLayers=3, numberOfCells=50, activation="relu", n_history=1, pred_horizon=1, numberOfFeatures=1):
    model = Sequential()
    model.add(GRU(numberOfCells, activation=activation, return_sequences=True, input_shape=(n_history, numberOfFeatures)))
    for i in range(numberOfHiddenLayers-1):
        model.add(GRU(numberOfCells, activation=activation, return_sequences=True))
        
    model.add(GRU(numberOfCells, activation=activation, return_sequences=False))    
    model.add(Dense(pred_horizon, activation=activation))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

def create_mlp(numberOfHiddenLayers=3, numberOfNeurons=50, activation="relu", n_history=1, pred_horizon=1, n_input=1):
    model = Sequential()
    model.add(Dense(numberOfNeurons, activation=activation, input_dim=n_input))
    
    for i in range(numberOfHiddenLayers):
        model.add(Dense(numberOfNeurons, activation=activation))
        
    model.add(Dense(pred_horizon))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

def create_cnn(numberOfHiddenConvPoolLayers = 1, filters=[75], kernelSize = 3, poolSize = 2, activation = "relu",
               numberOfLstmLayers = 1, numberOfLstmCells=50, lstmActivation = "tanh", 
               n_history=1, pred_horizon=1, numberOfFeatures = 1):
    model = Sequential()
    model.add(Conv1D(filters=filters[0], kernel_size=kernelSize, activation=activation, input_shape=(n_history, numberOfFeatures)))
    #model.add(MaxPooling1D(pool_size=poolSize))
    
    for i in range(numberOfHiddenConvPoolLayers - 1):
        model.add(Conv1D(filters=filters[i], kernel_size=kernelSize, activation=activation))
        #model.add(MaxPooling1D(pool_size=poolSize))
    
    for j in range(numberOfLstmLayers-1):
        model.add(LSTM(numberOfLstmCells, activation=lstmActivation,return_sequences=True))
    if (numberOfLstmLayers > 0):
        model.add(LSTM(numberOfLstmCells, activation=lstmActivation))
        
    model.add(Dense(pred_horizon))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

In [5]:
def lstm_data_preprocessing(df, n_history, pred_horizon, performPCA):
    if not df.index.name == timestamp:
        dataset = df.set_index(timestamp)
    else:
        dataset = df
    
    y = dataset[predictionColumn].copy()
    x = dataset.drop(columns=predictionColumn)
    
    scalerX = StandardScaler()
    scalerX.fit(x)
    x = scalerX.transform(x)
    scalerY = StandardScaler()
   # .reshape(-1, 1) # needed for standardScaler
    scalerY.fit(y.values.reshape(-1,1))
    y = scalerY.transform(y.values.reshape(-1,1))
    
    if(performPCA):
        pcaTransformer = PCA(0.95) # keep 95% variance
        pcaTransformer.fit(x)
        x = pcaTransformer.transform(x)
        print(''' *** PCA Result***\n Started with %d features, reduced to %d features''' 
              % (len(df.columns)-1, pcaTransformer.n_components_))
        
    transformed_df = pd.DataFrame().from_records(x)
    transformed_df[predictionColumn] = y
    
    transformed_df.reset_index(inplace=True)
    x, y = split_sequences(transformed_df.values, n_steps_in=n_history, n_steps_out=pred_horizon)
    return x, y, scalerX, scalerY

def train_lstm(df, n_history, pred_horizon, paramDict, performPCA=False):
    # data split / preprocessing
    preprocessingResultFile = "grid_lstm_preprocessingResult.pkl"
    if not os.path.isfile(preprocessingResultFile):
        print("Preprocessing data...")
        # data split / preprocessing
        x, y, scalerX, scalerY = lstm_data_preprocessing(df, n_history=n_history, pred_horizon=pred_horizon, performPCA=performPCA)
        print("SHAPES: ", x.shape, y.shape)
        numberOfFeatures = x.shape[2]
        x_train, x_test, y_train, y_test = split_train_test(x, y, pred_horizon)
        print("Shapes: xtr, xte, ytr, yte: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)
        preprocessingResult = [x_train, x_test, y_train, y_test, scalerX, scalerY]
        with(open(preprocessingResultFile, "wb")) as pkl:
            pickle.dump(preprocessingResult, pkl)
    else:
        with(open(preprocessingResultFile, "rb")) as pkl:
            x_train, x_test, y_train, y_test, scalerX, scalerY = pickle.load(pkl)
        numberOfFeatures = x_train.shape[2]
            
    paramDict["numberOfFeatures"] = [numberOfFeatures];

    model = KerasRegressor(build_fn = create_lstm)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
#     mc = ModelCheckpoint('grid_lstm_multistep_multivariate.h5', monitor='val_loss' , mode='min', verbose=1, save_best_only=True)
    grid = GridSearchCV(estimator=model, param_grid=paramDict, cv = 5, n_jobs=8)
    grid_result = grid.fit(x_train, y_train, validation_split=0.1, epochs= 200, callbacks=[es], shuffle=False)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    fitTimes = grid_result.cv_results_['mean_fit_time']
    for mean, stdev, param, fitTime in zip(means, stds, params, fitTimes):
        print("%f (%f) with: %r, fitted in %f" % (mean, stdev, param, fitTime))

    print('''*** Model fitted ***''')
    return None

In [6]:
def gru_data_preprocessing(df, n_history, pred_horizon, performPCA):
    if not df.index.name == timestamp:
        dataset = df.set_index(timestamp)
    else:
        dataset = df
    
    y = dataset[predictionColumn].copy()
    x = dataset.drop(columns=predictionColumn)
    
    scalerX = StandardScaler()
    scalerX.fit(x)
    x = scalerX.transform(x)
    scalerY = StandardScaler()
   # .reshape(-1, 1) # needed for standardScaler
    scalerY.fit(y.values.reshape(-1,1))
    y = scalerY.transform(y.values.reshape(-1,1))
    
    if(performPCA):
        pcaTransformer = PCA(0.95) # keep 95% variance
        pcaTransformer.fit(x)
        x = pcaTransformer.transform(x)
        print(''' *** PCA Result***\n Started with %d features, reduced to %d features''' 
              % (len(df.columns)-1, pcaTransformer.n_components_))
        
    transformed_df = pd.DataFrame().from_records(x)
    transformed_df[predictionColumn] = y
    
    transformed_df.reset_index(inplace=True)
    x, y = split_sequences(transformed_df.values, n_steps_in=n_history, n_steps_out=pred_horizon)
    return x, y, scalerX, scalerY

def gru_split_train_test(x, y, pred_horizon):
    x_train = x[:-pred_horizon]
    x_test = x[-pred_horizon:]
    y_train = y[:-pred_horizon]
    y_test = y[-pred_horizon:]
    
    return x_train, x_test, y_train, y_test

def train_gru(df, n_history, pred_horizon, paramDict, performPCA=False):
    preprocessingResultFile = "grid_gru_preprocessingResult.pkl"
    if not os.path.isfile(preprocessingResultFile):
        print("Preprocessing data...")
        # data split / preprocessing
        x, y, scalerX, scalerY = gru_data_preprocessing(df, n_history=n_history, pred_horizon=pred_horizon, performPCA=performPCA)
        print("SHAPES: ", x.shape, y.shape)
        numberOfFeatures = x.shape[2]
        x_train, x_test, y_train, y_test = split_train_test(x, y, pred_horizon)
        print("Shapes: xtr, xte, ytr, yte: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)
        preprocessingResult = [x_train, x_test, y_train, y_test, scalerX, scalerY]
        
        with(open(preprocessingResultFile, "wb")) as pkl:
            pickle.dump(preprocessingResult, pkl)
    else:
        with(open(preprocessingResultFile, "rb")) as pkl:
            x_train, x_test, y_train, y_test, scalerX, scalerY = pickle.load(pkl)
        numberOfFeatures = x_train.shape[2]
   
    paramDict["numberOfFeatures"] = [numberOfFeatures];    
        
    model = KerasRegressor(build_fn = create_gru)

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
#     mc = ModelCheckpoint('grid_gru_multistep_multivariate.h5', monitor='val_loss' , mode='min', verbose=1, save_best_only=True)
    grid = GridSearchCV(estimator=model, param_grid=paramDict, cv = 5, n_jobs=8)
    grid_result = grid.fit(x_train, y_train, validation_split=0.1, epochs= 100, callbacks=[es], shuffle=False)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    fitTimes = grid_result.cv_results_['mean_fit_time']
    for mean, stdev, param, fitTime in zip(means, stds, params, fitTimes):
        print("%f (%f) with: %r, fitted in %f" % (mean, stdev, param, fitTime))

    print('''*** Model fitted ***''')
    return None


In [7]:
def mlp_data_preprocessing(df, n_history, pred_horizon, performPCA):
    if not df.index.name == timestamp:
        dataset = df.set_index(timestamp)
    else:
        dataset = df
         
    y = dataset[predictionColumn].copy()
    x = dataset.drop(columns=predictionColumn)
    
    scalerX = StandardScaler()
    scalerX.fit(x)
    x = scalerX.transform(x)
    scalerY = StandardScaler()
   # .reshape(-1, 1) # needed for standardScaler
    scalerY.fit(y.values.reshape(-1,1))
    y = scalerY.transform(y.values.reshape(-1,1))
    
    if(performPCA):
        pcaTransformer = PCA(0.95) # keep 95% variance
        pcaTransformer.fit(x)
        x = pcaTransformer.transform(x)
        print(''' *** PCA Result***\n Started with %d features, reduced to %d features''' 
              % (len(df.columns)-1, pcaTransformer.n_components_))
        
    transformed_df = pd.DataFrame().from_records(x)
    transformed_df[predictionColumn] = y
    
    transformed_df.reset_index(inplace=True)
    x, y = split_sequences(transformed_df.values, n_steps_in=n_history, n_steps_out=pred_horizon)
    return x, y, scalerX, scalerY


def train_mlp(df, n_history, pred_horizon, paramDict, performPCA=True):
    preprocessingResultFile = "grid_mlp_preprocessingResult.pkl"
    if not os.path.isfile(preprocessingResultFile):
        print("Preprocessing data...")
        x, y, scalerX, scalerY = mlp_data_preprocessing(df, n_history=n_history, pred_horizon=pred_horizon, performPCA=performPCA)
        print("SHAPES: ", x.shape, y.shape)
        x_train, x_test, y_train, y_test = split_train_test(x, y, pred_horizon)
        print("Shapes: xtr, xte, ytr, yte: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)
        n_input = x_train.shape[1] * x_train.shape[2]
        x_train = x_train.reshape((x_train.shape[0], n_input))
        n_neurons = int((n_input + pred_horizon))
        preprocessingResult = [n_input, x_train, x_test, y_train, y_test, scalerX, scalerY]
        with(open(preprocessingResultFile, "wb")) as pkl:
            pickle.dump(preprocessingResult, pkl)
    else:
        with(open(preprocessingResultFile, "rb")) as pkl:
            n_input, x_train, x_test, y_train, y_test, scalerX, scalerY = pickle.load(pkl)
        n_neurons = int((n_input + pred_horizon))
        
    model = KerasRegressor(build_fn = create_mlp)
    
    
    paramDict["numberOfNeurons"] = [n_neurons]
    paramDict["n_input"] = [n_input]
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
#     mc = ModelCheckpoint('grid_mlp_multistep_multivariate.h5', monitor='val_loss' , mode='min', verbose=1, save_best_only=True)
    grid = GridSearchCV(estimator=model, param_grid=paramDict, cv = 5, n_jobs=8)
    grid_result = grid.fit(x_train, y_train, validation_split=0.1, epochs= 100, callbacks=[es], shuffle=False)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    fitTimes = grid_result.cv_results_['mean_fit_time']
    for mean, stdev, param, fitTime in zip(means, stds, params, fitTimes):
        print("%f (%f) with: %r, fitted in %f" % (mean, stdev, param, fitTime))

    print('''*** Model fitted ***''')
    return None

In [8]:
def cnn_data_preprocessing(df, n_history, pred_horizon, performPCA):
    if not df.index.name == timestamp:
        dataset = df.set_index(timestamp)
    else:
        dataset = df
    
    y = dataset[predictionColumn].copy()
    x = dataset.drop(columns=predictionColumn)
    
    scalerX = StandardScaler()
    scalerX.fit(x)
    x = scalerX.transform(x)
    scalerY = StandardScaler()
   # .reshape(-1, 1) # needed for standardScaler
    scalerY.fit(y.values.reshape(-1,1))
    y = scalerY.transform(y.values.reshape(-1,1))
    
    if(performPCA):
        pcaTransformer = PCA(0.95) # keep 95% variance
        pcaTransformer.fit(x)
        x = pcaTransformer.transform(x)
        print(''' *** PCA Result***\n Started with %d features, reduced to %d features''' 
              % (len(df.columns)-1, pcaTransformer.n_components_))
        
    transformed_df = pd.DataFrame().from_records(x)
    transformed_df[predictionColumn] = y
    
    transformed_df.reset_index(inplace=True)
    x, y = split_sequences(transformed_df.values, n_steps_in=n_history, n_steps_out=pred_horizon)
    return x, y, scalerX, scalerY

def train_cnn(df, pred_horizon, n_history,paramDict, performPCA=True):
    preprocessingResultFile = "grid_cnn_preprocessingResult.pkl"
    if not os.path.isfile(preprocessingResultFile):
        print("Preprocessing data...")
        x, y, scalerX, scalerY = cnn_data_preprocessing(df, n_history=n_history, pred_horizon=pred_horizon, performPCA=performPCA)
        print("SHAPES: ", x.shape, y.shape)
        numberOfFeatures = x.shape[2]
        x_train, x_test, y_train, y_test = split_train_test(x, y, pred_horizon)
        print("Shapes: xtr, xte, ytr, yte: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)
        preprocessingResult = [x_train, x_test, y_train, y_test, scalerX, scalerY]
        with(open(preprocessingResultFile, "wb")) as pkl:
            pickle.dump(preprocessingResult, pkl)
    else:
        with(open(preprocessingResultFile, "rb")) as pkl:
            x_train, x_test, y_train, y_test, scalerX, scalerY = pickle.load(pkl)
        numberOfFeatures = x_train.shape[2]
      
    paramDict["numberOfFeatures"] = [numberOfFeatures];
    model = KerasRegressor(build_fn = create_cnn)

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
#     mc = ModelCheckpoint('grid_cnn_multistep_multivariate.h5', monitor='val_loss' , mode='min', verbose=1, save_best_only=True)
    grid = GridSearchCV(estimator=model, param_grid=paramDict, cv = 5, n_jobs=8)
    grid_result = grid.fit(x_train, y_train, validation_split=0.1, epochs= 100, callbacks=[es], shuffle=False)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    print("grid.cv_results_\n\n")
    print(grid.cv_results_)
    print("grid.best_estimator\n\n")
    print(grid.best_estimator_)
    print("grid.best_params\n\n")
    print(grid.best_params_)
    
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    fitTimes = grid_result.cv_results_['mean_fit_time']
    for mean, stdev, param, fitTime in zip(means, stds, params, fitTimes):
        print("%f (%f) with: %r, fitted in %f" % (mean, stdev, param, fitTime))

    print('''*** Model fitted ***''')   
    return None


In [9]:
# Declaration
skip_csv_rows = 0
measureInterval = 15 #min
daysToPredict = 5
pred_horizon = (60//measureInterval) * 24 * daysToPredict #5 days (4*24*5), timestep = 15min
hours_history = 8
n_history = (60//measureInterval)*hours_history 

# Read data from pickle file
with open("./4week_transformed_droppedErrors_filled.pkl", "rb") as pickleFile:
    df = pickle.load(pickleFile)

In [10]:
df = df.reset_index()

In [11]:
df[predictionColumn] = pd.to_numeric(df[predictionColumn])

In [None]:
paramCellCount = [50, 75]
paramHiddenLayers = [4,6,8]
paramActivation = ["tanh"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenLayers = paramHiddenLayers, numberOfCells = paramCellCount, activation = paramActivation, 
                 n_history = paramHistory, pred_horizon = paramPredHorizon)
starttime = time.time()
train_lstm(df.copy().reset_index(), n_history=n_history, pred_horizon=pred_horizon, paramDict = paramDict)
print("training lstm took ", time.time() - starttime)

In [16]:
paramCellCount = [50]
paramHiddenLayers = [4,5,7]
paramActivation = ["relu"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenLayers = paramHiddenLayers, numberOfCells = paramCellCount, activation = paramActivation, 
                 n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for activation function relu vs tanh. Best: relu
starttime = time.time()
train_gru(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training GRU took ", time.time() - starttime)

 *** PCA Result***
 Started with 182 features, reduced to 60 features
SHAPES:  (2279, 32, 61) (2279, 480)
Shapes: xtr, xte, ytr, yte:  (1799, 32, 61) (480, 32, 61) (1799, 480) (480, 480)
Train on 1619 samples, validate on 180 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.87217, saving model to grid_gru_multistep_multivariate.h5
Epoch 2/100

Epoch 00002: val_loss improved from 0.87217 to 0.87144, saving model to grid_gru_multistep_multivariate.h5
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.87144
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.87144
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.87144
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.87144
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.87144
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.87144
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.87144
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.87144
Epoch 11/100

Epo

In [18]:
paramCellCount = [50]
paramHiddenLayers = [4,5,7]
paramActivation = ["relu"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenLayers = paramHiddenLayers, numberOfCells = paramCellCount, activation = paramActivation, 
                 n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for cell count. Best: 50
starttime = time.time()
train_gru(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training GRU took ", time.time() - starttime)

 *** PCA Result***
 Started with 182 features, reduced to 60 features
SHAPES:  (2279, 32, 61) (2279, 480)
Shapes: xtr, xte, ytr, yte:  (1799, 32, 61) (480, 32, 61) (1799, 480) (480, 480)



A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.



Train on 1619 samples, validate on 180 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.87413, saving model to grid_gru_multistep_multivariate.h5
Epoch 2/100

Epoch 00002: val_loss improved from 0.87413 to 0.87236, saving model to grid_gru_multistep_multivariate.h5
Epoch 3/100

Epoch 00003: val_loss improved from 0.87236 to 0.87146, saving model to grid_gru_multistep_multivariate.h5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.87146
Epoch 5/100

Epoch 00005: val_loss improved from 0.87146 to 0.87078, saving model to grid_gru_multistep_multivariate.h5
Epoch 6/100

Epoch 00006: val_loss improved from 0.87078 to 0.86879, saving model to grid_gru_multistep_multivariate.h5
Epoch 7/100

Epoch 00007: val_loss improved from 0.86879 to 0.86720, saving model to grid_gru_multistep_multivariate.h5
Epoch 8/100

Epoch 00008: val_loss improved from 0.86720 to 0.85594, saving model to grid_gru_multistep_multivariate.h5
Epoch 9/100

Epoch 00009: val_loss improved from 0.85

In [22]:
paramCellCount = [50]
paramHiddenLayers = [4,5,7]
paramActivation = ["relu"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenLayers = paramHiddenLayers, numberOfCells = paramCellCount, activation = paramActivation, 
                 n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for hidden layer count. Best: 5
starttime = time.time()
train_gru(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training GRU took ", time.time() - starttime)

 *** PCA Result***
 Started with 182 features, reduced to 60 features
SHAPES:  (2279, 32, 61) (2279, 480)
Shapes: xtr, xte, ytr, yte:  (1799, 32, 61) (480, 32, 61) (1799, 480) (480, 480)
Train on 1619 samples, validate on 180 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.87099, saving model to grid_gru_multistep_multivariate.h5
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.87099
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.87099
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.87099
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.87099
Epoch 6/100

Epoch 00006: val_loss improved from 0.87099 to 0.87012, saving model to grid_gru_multistep_multivariate.h5
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.87012
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.87012
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.87012
Epoch 10/100

Epoch 00010: val_loss improved from 0.87012 to 0.86787, saving model

In [None]:
paramHiddenLayers = [3]
paramActivation = ["sigmoid", "tanh", "relu"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenLayers = paramHiddenLayers, activation = paramActivation, 
                 n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for: activation func relu vs tanh vs sigmoid. Best: relu
starttime = time.time()
train_mlp(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training mlp took ", time.time() - starttime)

In [15]:
paramNumberOfHiddenConvPoolLayers = [2]
paramFilters = [(150,40)]# tuples instead of 2d array [[],[]] because of a bug: https://github.com/keras-team/keras/issues/13586
paramKernelSize = [2]
paramPoolSize = [2]
paramActivation = ["tanh"]
paramNumberOfLstmLayers = [1]
paramNumberOfLstmCells = [75]
paramLstmActivation = ["tanh"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenConvPoolLayers = paramNumberOfHiddenConvPoolLayers, filters=paramFilters, kernelSize = paramKernelSize,
                poolSize = paramPoolSize, activation=paramActivation,
                numberOfLstmLayers = paramNumberOfLstmLayers, numberOfLstmCells=paramNumberOfLstmCells, lstmActivation=paramLstmActivation,
                n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for: KernelSize, PoolSize Best: kernelSize=2, poolSize=2
starttime = time.time()
train_cnn(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training cnn took ", time.time() - starttime)

Train on 2050 samples, validate on 228 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


In [12]:
paramNumberOfHiddenConvPoolLayers = [2, 3]
paramFilters = [(150,40)]# tuples instead of 2d array [[],[]] because of a bug: https://github.com/keras-team/keras/issues/13586
paramKernelSize = [2]
paramPoolSize = [2]
paramActivation = ["tanh"]
paramNumberOfLstmLayers = [0,1,2]
paramNumberOfLstmCells = [75]
paramLstmActivation = ["tanh"]
paramHistory = [n_history]
paramPredHorizon = [pred_horizon]

paramDict = dict(numberOfHiddenConvPoolLayers = paramNumberOfHiddenConvPoolLayers, filters=paramFilters, kernelSize = paramKernelSize,
                poolSize = paramPoolSize, activation=paramActivation,
                numberOfLstmLayers = paramNumberOfLstmLayers, numberOfLstmCells=paramNumberOfLstmCells, lstmActivation=paramLstmActivation,
                n_history = paramHistory, pred_horizon = paramPredHorizon)
# Grid Search for: KernelSize, PoolSize Best: kernelSize=2, poolSize=2
starttime = time.time()
train_cnn(df.copy().reset_index(), pred_horizon=pred_horizon, n_history=n_history, paramDict = paramDict)
print("training cnn took ", time.time() - starttime)

Train on 2050 samples, validate on 228 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 00019: early stopping
Best: -1.126539 using {'activation': 'tanh', 'filters': (150, 40), 'kernelSize': 2, 'lstmActivation': 'tanh', 'n_history': 32, 'numberOfFeatures': 61, 'numberOfHiddenConvPoolLayers': 3, 'numberOfLstmCells': 75, 'numberOfLstmLayers': 1, 'poolSize': 2, 'pred_horizon': 480}
grid.cv_results_


{'mean_fit_time': array([  0.52848301,  72.21127391, 115.60849996,   2.55459943,
        76.89535131,  72.1524456 ]), 'std_fit_time': array([1.22646488e-02, 3.48962682e+01, 1.33110249e+01, 1.25500622e+00,
       1.96818277e+00, 1.05034176e+01]), 'mean_score_time': array([0.        , 0.24240174, 0.20100451, 0.        , 0.15679832,
       0.10539932]), 'std_score_time': array([0.        , 0.0511