[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Humboldt-WI/bads/blob/master/tutorials/9_nb_feature_engineering.ipynb) 

In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
!pip install ax-platform



### Load Data and Set Parameters

In [47]:
# Import standard Python libraries
import numpy as np
import pandas as pd
import time
import json
import os
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
import warnings
from pickle import load
warnings.filterwarnings("ignore")

%matplotlib inline  
plt.rcParams["figure.figsize"] = (12,6)

from numpy import array
from ax.service.ax_client import AxClient
from ax.utils.notebook.plotting import render, init_notebook_plotting
import keras
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, LSTM, Concatenate
from tensorflow.keras.layers import Dropout, Bidirectional, Flatten
from tensorflow.keras.layers import Conv1D, Conv2D
from tensorflow.keras.layers import MaxPooling1D, Embedding
from tensorflow.keras.layers import RepeatVector, Reshape
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
#from sklearn.neural_network import MLPRegressor
from tensorflow.keras.layers import Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance

#### Helper Functions

In [48]:
def get_accuracy(y_real, y_pred):
  deviation_pred = np.abs((y_real - y_pred))
  dev_in_percent = (deviation_pred/100)*np.abs(y_real)
  dev_in_percent = np.where(dev_in_percent==0, 5, dev_in_percent)
  acc_5_lst = np.where(dev_in_percent>5, 0, 1)
  acc_5 = acc_5_lst.sum()/len(y_real)
  return acc_5

def get_accuracy_90(y_real, y_pred):
  deviation_pred = np.abs((y_real - y_pred))
  dev_in_percent = (deviation_pred/100)*np.abs(y_real)
  dev_in_percent = np.where(dev_in_percent==0, 5, dev_in_percent)
  acc_5_lst = np.where(dev_in_percent>5, 0, 1)
  acc_10 = acc_5_lst.sum()/len(y_real)
  return acc_10

def get_wmape(y_real, y_pred):
  y_real, y_pred = np.array(y_real), np.array(y_pred) 
  return np.sum(np.abs((y_real - y_pred))) / np.sum(np.abs(y_real))


def get_mase_onestep(y_real, y_pred, y_train):
    """
    Computes the MEAN-ABSOLUTE SCALED ERROR forcast error for univariate time series prediction.
    
    See "Another look at measures of forecast accuracy", Rob J Hyndman
    
    parameters:
        training_series: the series used to train the model, 1d numpy array
        testing_series: the test series to predict, 1d numpy array or float
        prediction_series: the prediction of testing_series, 1d numpy array (same size as testing_series) or float
        absolute: "squares" to use sum of squares and root the result, "absolute" to use absolute values.
    
    """
    n = y_train.shape[0]
    d = np.abs(np.diff(y_train)).sum()/(n-1)
    
    errors = np.abs(y_real - y_pred )
    return errors.mean()/d

def get_mase(actual: np.ndarray, predicted: np.ndarray, naive: np.ndarray):
    """
    Mean Absolute Scaled Error
    Baseline (benchmark) is computed with naive forecasting 24 h
    """
    return mean_absolute_error(actual, predicted) / mean_absolute_error(actual, naive)


In [49]:
# Formulas to get all evaluation metrics for flattened and unflattened predictions
def evaluate_predictions_flat(actual, predicted, test_y=None, testing=True):

    if (actual.shape[1] != 1) or (predicted.shape[1] != 1):
      raise AssertionError('Second dimension of actual and predicted array must be 1')

    rmse_flat = mean_squared_error(actual, predicted, squared=False)
    nrmse_flat = np.asscalar(mean_squared_error(actual, predicted, squared=False)/(max(actual)-min(actual)))
    mae_flat = mean_absolute_error(actual, predicted)
    wmape_flat = get_wmape(actual, predicted)
    mase_flat = get_mase(actual, predicted, test_y)
    acc_flat = get_accuracy(actual, predicted)
    acc_90_flat = get_accuracy_90(actual, predicted)

    print('RMSE/MAE/wMAPE/MASE/NRMSE/ACCURACY')
    print(f'{rmse_flat:.2f},{mae_flat:.2f},{wmape_flat:.2f},{mase_flat:.2f}, {nrmse_flat:.2f}, {acc_flat:.2f}')

    if not round(wmape_flat,0)==0 and testing==True:
      raise AssertionError()

    return [rmse_flat, mae_flat, wmape_flat, mase_flat, nrmse_flat, acc_flat, acc_90_flat]

def evaluate_predictions_windows(actual, predicted, test_y=None, testing=True):

    wmapes_24 = []
    rmse_24 = []
    nrmse_24 = []
    mae_24 = []
    mase_24 = []
    acc_24 = []
    acc_90_24 = []

    for i in range(actual.shape[0]):
      rmses = mean_squared_error(actual[i], predicted[i], squared=False)
      nrmses = mean_squared_error(actual[i], predicted[i], squared=False)/(max(actual[i])-min(actual[i]))
      maes = mean_absolute_error(actual[i], predicted[i])
      wmapes = get_wmape(actual[i], predicted[i])
      mases = get_mase(actual[i], predicted[i], test_y[i])
      accs = get_accuracy(actual[i], predicted[i])
      accs_90 = get_accuracy_90(actual[i], predicted[i])

      rmse_24.append(rmses)
      nrmse_24.append(nrmses)
      mae_24.append(maes)
      wmapes_24.append(wmapes)
      mase_24.append(mases)
      acc_24.append(accs)
      acc_90_24.append(accs_90)

    if not round(np.mean(np.ma.masked_invalid(wmapes_24)),0)==0 and testing==True:
      raise AssertionError()

    return [rmse_24, mae_24, wmapes_24, mase_24, nrmse_24, acc_24, acc_90_24]

In [60]:
# calculate differences to original data for many models:
def print_originalvalues_results(result_list, y_test_control, naive, originals=True):
  
  rmse_lst = []
  mae_lst = []
  wmape_lst = []
  mase_lst = []
  nrmse_lst = []
  accur_lst = []
  accur_90_lst = []

  for i in range(len(result_list)):
    if originals == True:
      y_pred_fridge = preproc_target.inverse_transform(result_list[i].reshape((-1,1)))
      metric_list = evaluate_predictions_flat(y_test_control, y_pred_fridge, naive, testing=False)
    else:
      scaled_true = preproc_target.transform(y_test_control)
      scaled_naive = preproc_target.transform(naive)
      metric_list = evaluate_predictions_flat(scaled_true, result_list[i].reshape((-1,1)), scaled_naive, testing=False)

    
    rmse_lst.append(metric_list[0])
    mae_lst.append(metric_list[1])
    wmape_lst.append(metric_list[2])
    mase_lst.append(metric_list[3])
    nrmse_lst.append(metric_list[4])
    accur_lst.append(metric_list[5])
    accur_90_lst.append(metric_list[6])

  return rmse_lst, mae_lst, wmape_lst, mase_lst, nrmse_lst, accur_lst, accur_90_lst

In [51]:
# Fil empty dataframe with the time lag observations per variable as features for 24h
def get_tlags_rolling_features(df, col_names, tlag):

   dataframe = pd.DataFrame()
   for col in col_names:
     for i in range((tlag-1),0,-1):
       dataframe[col + '_t-'+str(i)] = df[col].shift(i).values[:]
     dataframe[col] = df[col]
   
   dataframe = dataframe[(tlag-1):]
   dataframe.dropna(inplace=True)
   dataframe.reset_index(drop=True, inplace=True)
   
   nparray = np.empty((dataframe.shape[0], tlag, len(col_names)))
   for i in range(len(col_names)):
      nparray[:,:,i] = dataframe.values[:,i*tlag:(i+1)*tlag]
   return dataframe, nparray

In [52]:
def get_msvr_gamma(xtrain, relational=True, factor=10):

  if relational==True:
    msvr_gamma = 1 / (xtrain.shape[1] * xtrain.var())

  else:
    msvr_gamma = 1 / factor
  
  return msvr_gamma

#### REFIT

#### Specify Experiment Parameters

In [53]:
# check reshape dimension for loading original data
# solve seq2seq decoder output deformation (related to feature specification)

In [54]:
# Load Data
dataset = 'refit' # 'ampds' , 'pecansd', 'greend'
dimension_var = 'many' # define wether it will be the baseline or a feature run-through
ylag = 24 # define number of time steps to predict
target = 'Fridge' # Specify the target Appliance
feature_group = 'appliances'
path_to_data_folder = '/content/drive/MyDrive/'
tune_model = 'lstm' # 'lstm', 'ffnn'

if dataset == 'refit':
  data_url = f'{path_to_data_folder}data/REFIT/'
  reshape_dimension = 113
elif dataset == 'ampds':
  data_url = f'{path_to_data_folder}data/AMPds/'
  reshape_dimension = 113
elif dataset == 'pecansd':
  data_url = f'{path_to_data_folder}data/PecanSD/'
  reshape_dimension = 86
elif dataset == 'greend':
  data_url = f'{path_to_data_folder}data/GreenD/building0/'
  reshape_dimension = 86  
else:
  raise AssertionError('Please specify a valid dataset')

df = pd.read_csv(f'{data_url}traindata_scaled.csv')

nparray = np.loadtxt(f'{data_url}traindata_scaled.txt')
nparray = nparray.reshape((-1, ylag, reshape_dimension))
print(nparray.shape)

# train and valid set params
split_ratio = 0.2
val_split = int(np.floor(len(df)*split_ratio))
target_idx = list(df.columns).index(target)-1

# FE Selection Features:
if feature_group == 'baseline':
  col_idx = np.r_[target_idx:target_idx+1]
  dimension_var = 'one'
elif feature_group == 'appliances':
  if dataset in ('refit', 'ampds'):
    # Nur Appliances:
    col_idx = np.r_[0:5]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[0:4]
 
elif feature_group == 'weather': 
  if dataset in ('refit', 'ampds'):
    col_idx = np.r_[target_idx:target_idx+1,5:8]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[target_idx:target_idx+1,4:7]

elif feature_group == 'sinecosine': 
  if dataset in ('refit', 'ampds'):
    col_idx = np.r_[target_idx:target_idx+1,90:91, 95:103]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[target_idx:target_idx+1, 65:66, 70:78]

elif feature_group == 'lon_loff': 
  if dataset in ('refit', 'ampds'):
    col_idx = np.r_[target_idx:target_idx+1, 103+target_idx:105+target_idx+1]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[target_idx:target_idx+1, 78+target_idx:80+target_idx+1]

elif feature_group == 'lon_loff': 
  if dataset in ('refit', 'ampds'):
    col_idx = np.r_[0:91, 95:113]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[0:66, 70:86] 

elif feature_group == 'weather_sinecosine': 
  if dataset in ('refit', 'ampds'):
    col_idx = np.r_[target_idx:target_idx+1, 90:91, 95:103]
  elif dataset in ('pecansd', 'greend'):
    col_idx = np.r_[target_idx:target_idx+1,65:66, 70:78] 
elif feature_group =='autoreg_features':
  autoreg_features = True
elif feature_group =='interact_features':
  interact_features = True
elif feature_group =='vest_features':
  vest_features = True
elif feature_group =='taken_features':
  taken_features = True  
else:
  raise AssertionError('Please specify a valid feature group')


# Model params all models:
epochs = 10
loss = 'mse'
optimizer = tf.keras.optimizers.Adam(lr=0.0001)



(9330, 24, 113)


In [55]:
Xtrain_arrayfe = nparray[:-val_split]
ytrain_arrayfe = nparray[ylag:-val_split+ylag, :, target_idx]

Xvalid_arrayfe = nparray[-val_split:-ylag]
yvalid_arrayfe = nparray[-val_split+ylag:, :, target_idx]

# get scalars and test transformation process for evaluation
preproc_target = load(open(f'{data_url}std_scaler_{target}.pkl', 'rb'))
df_tr = pd.read_csv(f'{data_url}original_traindata_imputed_outlreplaced.csv')

# inverse transform our predictions:
y_valid_fridge = yvalid_arrayfe[::24, :].reshape((-1,1))
y_valid_fridge = preproc_target.inverse_transform(y_valid_fridge)

#inverse transform test values
Xvalid_fridge = Xvalid_arrayfe[::24,:,target_idx].reshape((-1,1))
Xvalid_fridge = preproc_target.inverse_transform(Xvalid_fridge)

# get part of original data that we use
df_valid_true = df_tr[target].iloc[-len(y_valid_fridge)-11:-11].values.reshape((-1,1)) # refit = -3/ smart -13/ ampds -13

# calculate difference to original data
rmse, mae, mape_sin_nan, mase, nrmse, acc, acc_90 = evaluate_predictions_flat(df_valid_true, y_valid_fridge, test_y= Xvalid_fridge)

RMSE/MAE/wMAPE/MASE/NRMSE/ACCURACY
0.00,0.00,0.00,0.00, 0.00, 1.00


In [56]:
# Load Test Data
Xtest_array = np.loadtxt(f'{data_url}testdata_scaled.txt')
Xtest_array = Xtest_array.reshape((-1,24,reshape_dimension)) 
Xtest = Xtest_array[:-1, :, :].copy()
ytest = Xtest_array[1:, :, target_idx].copy()

# Check for Testdata:
df_t = pd.read_csv(f'{data_url}original_testdata_imputed_outlreplaced.csv')

y_test_fridge = ytest.reshape((-1,1))
y_test_fridge = preproc_target.inverse_transform(y_test_fridge)
y_test_fridge_wind = y_test_fridge.reshape(ytest.shape)

#inverse transform test values
Xtest_fridge = Xtest[:,:,target_idx].reshape((-1,1))
Xtest_fridge = preproc_target.inverse_transform(Xtest_fridge)
Xtest_fridge_wind = Xtest_fridge.reshape(Xtest[:,:,0].shape)

# get part of original data that we use
df_test_true = df_t[target].iloc[-len(y_test_fridge)-24:-24].values.reshape((-1,1)) # refit -24  / Ampds -24
df_test_true_wind = df_test_true.reshape(Xtest[:,:,0].shape)

# calculate difference to original data
rmse, mae, mape_sin_nan, mase, nrmse, acc, acc_90 = evaluate_predictions_flat(df_test_true, y_test_fridge, test_y = Xtest_fridge)

RMSE/MAE/wMAPE/MASE/NRMSE/ACCURACY
0.00,0.00,0.00,0.00, 0.00, 1.00


#### Daten für Experimente

In [57]:
Xtrain_arrayfe = Xtrain_arrayfe[:,:,col_idx]
Xvalid_arrayfe = Xvalid_arrayfe[:,:,col_idx]
Xtest = Xtest[:,:,col_idx]

# Input XGBoost, MSVR and FFNN:
X_train = Xtrain_arrayfe.reshape((Xtrain_arrayfe.shape[0],-1))
X_valid = Xvalid_arrayfe.reshape((Xvalid_arrayfe.shape[0],-1))
X_test = Xtest.reshape((Xtest.shape[0],-1))

print(f'Shape of train and val array: {Xtrain_arrayfe.shape}, {Xvalid_arrayfe.shape}')
print(f'Shape of train and val y array: {ytrain_arrayfe.shape}, {yvalid_arrayfe.shape}')
print(f'Shape of test data X-array and y-array: {Xtest.shape}, {ytest.shape}') 
print(X_train.shape)
print(X_test.shape)

Shape of train and val array: (7446, 24, 5), (1860, 24, 5)
Shape of train and val y array: (7446, 24), (1860, 24)
Shape of test data X-array and y-array: (112, 24, 5), (112, 24)
(7446, 120)
(112, 120)


#### SVR

##### Code

In [58]:
import numpy as np
from sklearn.metrics.pairwise import pairwise_kernels

class MSVR():
    def __init__(self, kernel='rbf', degree=3, gamma=None, coef0=0.0, tol=0.001, C=1.0, epsilon=0.1):
        super(MSVR, self).__init__()
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
        self.tol = tol
        self.C = C
        self.epsilon = epsilon
        self.Beta = None
        self.NSV = None
        self.xTrain = None

    def fit(self, x, y):
        self.xTrain = x.copy()
        C = self.C
        epsi = self.epsilon
        tol = self.tol

        n_m = np.shape(x)[0]  # num of samples
        n_d = np.shape(x)[1]  # input data dimensionality
        n_k = np.shape(y)[1]  # output data dimensionality (output variables)

        # H = kernelmatrix(ker, x, x, par)
        H = pairwise_kernels(x, x, metric=self.kernel, filter_params=True,
                             degree=self.degree, gamma=self.gamma, coef0=self.coef0)

        self.Beta = np.zeros((n_m, n_k))

        #E = prediction error per output (n_m * n_k)
        E = y - np.dot(H, self.Beta)
        #RSE
        u = np.sqrt(np.sum(E**2, 1, keepdims=True))

        #RMSE
        RMSE = []
        RMSE_0 = np.sqrt(np.mean(u**2))
        RMSE.append(RMSE_0)

        #points for which prediction error is larger than epsilon
        i1 = np.where(u > epsi)[0]

        #set initial values of alphas a (n_m * 1)
        a = 2 * C * (u - epsi) / u

        #L (n_m * 1)
        L = np.zeros(u.shape)

        # we modify only entries for which  u > epsi. with the sq slack
        L[i1] = u[i1]**2 - 2 * epsi * u[i1] + epsi**2

        #Lp is the quantity to minimize (sq norm of parameters + slacks)
        Lp = []
        BetaH = np.dot(np.dot(self.Beta.T, H), self.Beta)
        Lp_0 = np.sum(np.diag(BetaH), 0) / 2 + C * np.sum(L)/2
        Lp.append(Lp_0)

        eta = 1
        k = 1
        hacer = 1
        val = 1

        while(hacer):
            Beta_a = self.Beta.copy()
            E_a = E.copy()
            u_a = u.copy()
            i1_a = i1.copy()

            M1 = H[i1][:, i1] + \
                np.diagflat(1/a[i1]) + 1e-10 * np.eye(len(a[i1]))

            #compute betas
            #       sal1 = np.dot(np.linalg.pinv(M1),y[i1])  #求逆or广义逆（M-P逆）无法保证M1一定是可逆的？
            sal1 = np.dot(np.linalg.inv(M1), y[i1])

            eta = 1
            self.Beta = np.zeros(self.Beta.shape)
            self.Beta[i1] = sal1.copy()

            #error
            E = y - np.dot(H, self.Beta)
            #RSE
            u = np.sqrt(np.sum(E**2, 1)).reshape(n_m, 1)
            i1 = np.where(u >= epsi)[0]

            L = np.zeros(u.shape)
            L[i1] = u[i1]**2 - 2 * epsi * u[i1] + epsi**2

            #%recompute the loss function
            BetaH = np.dot(np.dot(self.Beta.T, H), self.Beta)
            Lp_k = np.sum(np.diag(BetaH), 0) / 2 + C * np.sum(L)/2
            Lp.append(Lp_k)

            #Loop where we keep alphas and modify betas
            while(Lp[k] > Lp[k-1]):
                eta = eta/10
                i1 = i1_a.copy()

                self.Beta = np.zeros(self.Beta.shape)
                #%the new betas are a combination of the current (sal1)
                #and of the previous iteration (Beta_a)
                self.Beta[i1] = eta*sal1 + (1-eta)*Beta_a[i1]

                E = y - np.dot(H, self.Beta)
                u = np.sqrt(np.sum(E**2, 1)).reshape(n_m, 1)

                i1 = np.where(u >= epsi)[0]

                L = np.zeros(u.shape)
                L[i1] = u[i1]**2 - 2 * epsi * u[i1] + epsi**2
                BetaH = np.dot(np.dot(self.Beta.T, H), self.Beta)
                Lp_k = np.sum(np.diag(BetaH), 0) / 2 + C * np.sum(L)/2
                Lp[k] = Lp_k

                #stopping criterion 1
                if(eta < 1e-16):
                    Lp[k] = Lp[k-1] - 1e-15
                    self.Beta = Beta_a.copy()

                    u = u_a.copy()
                    i1 = i1_a.copy()

                    hacer = 0

            #here we modify the alphas and keep betas
            a_a = a.copy()
            a = 2 * C * (u - epsi) / u

            RMSE_k = np.sqrt(np.mean(u**2))
            RMSE.append(RMSE_k)

            if((Lp[k-1]-Lp[k])/Lp[k-1] < tol):
                hacer = 0

            k = k + 1

            #stopping criterion #algorithm does not converge. (val = -1)
            if(len(i1) == 0):
                hacer = 0
                self.Beta = np.zeros(self.Beta.shape)
                val = -1

        self.NSV = len(i1)

    def predict(self, x):
        H = pairwise_kernels(x, self.xTrain, metric=self.kernel, filter_params=True,
                             degree=self.degree, gamma=self.gamma, coef0=self.coef0)
        yPred = np.dot(H, self.Beta)
        return yPred

    # def score(self,x):

##### Predictions

In [59]:
# params:
gamma_range= [1/100, 1/1000, 1 /(X_train.shape[1] * X_train.var())]
epsilon_range= [1,0.1, 0.01, 0.001, 0.0001]
c_range = [100, 10, 1, 0.1, 0.01, 0.001]
msvr_best = 1000

for ga in gamma_range:
  for ep in epsilon_range:
    for cr in c_range:
      msvr = MSVR(kernel = 'rbf', gamma = ga, epsilon=ep, degree=2, C=cr) 
      msvr.fit(X_train, ytrain_arrayfe)
      yhat = msvr.predict(X_test)
      y_pred_fridge = preproc_target.inverse_transform(yhat)
      y_pred_fridge = y_pred_fridge.reshape(-1,1)
      print(y_pred_fridge.shape)
      msvr_tuning_results = evaluate_predictions_flat(df_test_true, y_pred_fridge, test_y = Xtest_fridge, testing=False)
      if msvr_tuning_results[0]>=msvr_best:
        best_params = {'Gamma': ga, 'epsilon': ep, 'c_value': cr}
        msvr_best=msvr_tuning_results[0]

print(best_params)

(2688, 1)
RMSE/MAE/wMAPE/MASE/NRMSE/ACCURACY
19.92,16.10,1.01,0.95, 0.20, 0.80
(2688, 1)
RMSE/MAE/wMAPE/MASE/NRMSE/ACCURACY
17.92,14.78,0.93,0.87, 0.18, 0.80


KeyboardInterrupt: ignored

In [None]:
import sklearn
# Compare predictins against multi step SVR:
class VectorRegression(sklearn.base.BaseEstimator):
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, y):
        n, m = y.shape
        # Fit a separate regressor for each column of y
        self.estimators_ = [sklearn.base.clone(self.estimator).fit(X, y[:, i])
                               for i in range(m)]
        return self

    def predict(self, X):
        # Join regressors' predictions
        res = [est.predict(X)[:, np.newaxis] for est in self.estimators_]
        return np.hstack(res)


In [None]:
from sklearn.svm import SVR

simple_svr = VectorRegression(SVR(epsilon=0.1, C=0.1))
simple_svr.fit(X_train, ytrain_arrayfe)
yhat = simple_svr.predict(X_test)

y_pred_fridge = yhat.reshape((-1,1))
y_pred_fridge = preproc_target.inverse_transform(y_pred_fridge)
svr_tuning_results = evaluate_predictions_flat(df_test_true, y_pred_fridge, test_y = Xtest_fridge, testing=False)
print(svr_tuning_results[0])

#### Neural Networks Tuning

In [61]:
if tune_model == 'ffnn':
  def get_keras_model(neurons_per_layer_return, 
                      dropout_rate, 
                      lky_lr, nr_layers):
  
      # define the layers.
      # Define input layers
      input = Input(X_train[0].shape)
      for i in range(nr_layers):
        layer = Dense(neurons_per_layer_return)(input)
        lr1 = LeakyReLU(alpha=lky_lr)(layer)
        drpt1 = Dropout(dropout_rate)(lr1)
  
      outputs = Dense(24)(drpt1)  
  
      model = tf.keras.Model(inputs=input, outputs=outputs)
      return model

else:
  def get_keras_model(neurons_per_layer_return, 
                      dropout_rate, 
                      lky_lr, nr_layers):
  
      # define the layers.
      inputs = Input(Xtrain_arrayfe[0].shape)  # input layer.
      x = Dropout(dropout_rate)(inputs) # dropout on the weights.
      
      # Add the hidden layers.
      x = LSTM(neurons_per_layer_return, return_sequences=True,  input_shape=Xtrain_arrayfe.shape)(x)
      #x = LeakyReLU(alpha=lky_lr)(x)
      x = Dropout(dropout_rate)(x)
  
      x = LSTM(neurons_per_layer_return)(x)
      #x = LeakyReLU(alpha=lky_lr)(x)
      x = Dropout(dropout_rate)(x)        
      # output layer.
      outputs = Dense(24, activation='linear')(x)
      outputs = LeakyReLU(alpha=lky_lr)(outputs)
  
      model = tf.keras.Model(inputs=inputs, outputs=outputs)
      return model

In [62]:

  

# This function takes in the hyperparameters and returns a score (Cross validation).
def keras_mlp_cv_score(parameterization, weight=None):
    
    model = get_keras_model(parameterization.get('neurons_per_layer_return'),
                            parameterization.get('dropout_rate'),
                            parameterization.get('lky_lr'),
                            parameterization.get('nr_layers'))
    
    opt = parameterization.get('optimizer')
    opt = opt.lower()
    
    learning_rate = parameterization.get('learning_rate')
    
    if opt == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    NUM_EPOCHS = 20
    
    # Specify the training configuration.
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=['mse'])

    data = Xtrain_arrayfe
    labels = ytrain_arrayfe
    
    # fit the model using a 20% validation set.
    res = model.fit(data, labels, epochs=NUM_EPOCHS, batch_size=parameterization.get('batch_size'),
                    validation_split=0.2)
    
    # look at the last 10 epochs. Get the mean and standard deviation of the validation score.
    last10_scores = np.array(res.history['val_loss'][-10:])
    mean = last10_scores.mean()
    sem = last10_scores.std()
    
    # If the model didn't converge then set a high loss.
    if np.isnan(mean):
        return 9999.0, 0.0
    
    return mean, sem

In [63]:
# Define the search space.
parameters=[
    {
        "name": "learning_rate",
        "type": "range",
        "bounds": [0.0001, 0.5],
        "log_scale": True,
    },
    {
        "name": "dropout_rate",
        "type": "range",
        "bounds": [0.01, 0.5],
        "log_scale": True,
    },
        {
        "name": "neurons_per_layer_return",
        "type": "range",
        "bounds": [1, 400],
        "value_type": "int"
    },
    {
        "name": "batch_size",
        "type": "choice",
        "values": [8, 16, 32, 64, 128],
    },
    
    {
        "name": "lky_lr",
        "type": "choice",
        "values": [0.1, 0.2, 0.5],
    },
    {
        "name": "optimizer",
        "type": "choice",
        "values": ['adam'],
    },
    {
        "name": "nr_layers",
        "type": "range",
        "bounds": [1, 6],
        "value_type": "int"
    },
]

In [64]:
init_notebook_plotting()

ax_client = AxClient()

# create the experiment.
ax_client.create_experiment(
    name="keras_experiment",
    parameters=parameters,
    objective_name='keras_cv',
    minimize=True)

def evaluate(parameters):
    return {"keras_cv": keras_mlp_cv_score(parameters)}

Output hidden; open in https://colab.research.google.com to view.

In [65]:
for i in range(25):
    parameters, trial_index = ax_client.get_next_trial()
    ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters))

[INFO 04-22 22:59:19] ax.service.ax_client: Generated new trial 0 with parameters {'learning_rate': 0.000368, 'dropout_rate': 0.018984, 'neurons_per_layer_return': 15, 'batch_size': 32, 'lky_lr': 0.2, 'nr_layers': 5, 'optimizer': 'adam'}.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

KeyboardInterrupt: ignored

In [None]:
best_parameters, values = ax_client.get_best_parameters()

# the best set of parameters.
for k in best_parameters.items():
  print(k)


# the best score achieved.
means, covariances = values
print(means)

# mit leaky relu
#('learning_rate', 0.00013804406758572161)
#('dropout_rate', 0.05679751211790285)
#('neurons_per_layer_return', 172)
#('batch_size', 32)
#('lky_lr', 0.2)
#('optimizer', 'adam')
#{'keras_cv': 0.8750310514613202}

#ohne leaky relu
#('learning_rate', 0.0001)
#('dropout_rate', 0.0755114162939656)
#('neurons_per_layer_return', 377)
#('batch_size', 64)
#('lky_lr', 0.1)
#('optimizer', 'adam')
#
#{'keras_cv': 0.872819151614518}

In [None]:
loss = 'mse'
optimizer = tf.keras.optimizers.Adam(lr=0.0001)
drpt1_rate = 0.06
hidd_dim_lstm1 = 172
hidd_dim_lstm2 = 172
lr1_alpha = 0.2

start = time.time()
# Train Model 10 Fold:
lstm_result_lst = []
for i in [42, 10, 567, 239, 400, 1390, 380, 9, 27, 769]:
  # Define input layers
  sequential_input = Input(Xtrain_arrayfe[0].shape)
  
  # Model Architecture: LSTM
  lstm1 = LSTM(hidd_dim_lstm1, return_sequences=True, input_shape=Xtrain_arrayfe.shape)(sequential_input)
  lr1 = LeakyReLU(alpha=lr1_alpha)(lstm1)
  drpt1 = Dropout(drpt1_rate)(lr1)
  lstm2 = LSTM(hidd_dim_lstm2)(drpt1) #, return_sequences=True
  lr2 = LeakyReLU(alpha=lr1_alpha)(lstm2)
  drpt2 = Dropout(drpt1_rate)(lr2)

  dense = Dense(24)(drpt2)
  outputs = LeakyReLU(alpha=lr1_alpha)(dense)

  # Define the model
  lstm = Model(inputs=sequential_input, outputs=outputs)
  lstm.compile(loss=loss, optimizer=optimizer)
  
  # Training loop
  seed_value = i
  os.environ['PYTHONHASHSEED']=str(seed_value)
  random.seed(seed_value)
  np.random.seed(seed_value)
  tf.compat.v1.set_random_seed(seed_value)
  lstm.fit(Xtrain_arrayfe, ytrain_arrayfe, validation_data = (Xvalid_arrayfe, yvalid_arrayfe), epochs = 50, batch_size=32) #
  y_pred = lstm.predict(Xtest)
  lstm_result_lst.append(y_pred)
  lstm.reset_states() 

In [None]:
rmse_lst, mae_lst, mape_lst, mase_lst, log_acc_lst, nrmse_lst = print_originalvalues_results(lstm_result_lst, df_test_true, Xtest_fridge)

print(rmse_lst)
print(mae_lst)
print(mape_lst)
print(mase_lst)
print(log_acc_lst)
print(nrmse_lst)

print('Mean RMSE, MAE, MAPE, MASE, LOG_ACC, NRMSE:')
print(f'{sum(rmse_lst)/len(rmse_lst):.2f}/{sum(mae_lst)/len(mae_lst):.2f}/{sum(mape_lst)/len(mape_lst):.2f}/{sum(mase_lst)/len(mase_lst):.2f}/{sum(log_acc_lst)/len(log_acc_lst):.2f}/{sum(nrmse_lst)/len(nrmse_lst):.2f}')