In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import time
import json

import CustomHyperModel
import CustomMetrics
import EnergyPricesLibrary as Ep

from kerastuner.tuners import BayesianOptimization

%load_ext autoreload
%autoreload 2

In [2]:
def make_predictions(model,scaler_y,trainX,trainY,testX,testY,n_steps_out,len_output_features):
    
    # make predictions
    trainPredict = model.predict(trainX)
    trainPredict = trainPredict.reshape(trainPredict.shape[0]*n_steps_out,len_output_features)
    testPredict  = model.predict(testX)
    testPredict  = testPredict.reshape(testPredict.shape[0]*n_steps_out,len_output_features)
    
    # invert predictions
    trainPredict = scaler_y.inverse_transform(trainPredict)
    trainY_ = scaler_y.inverse_transform(trainY.reshape(trainY.shape[0]*n_steps_out,len_output_features))
    
    testPredict = scaler_y.inverse_transform(testPredict)
    testY_ = scaler_y.inverse_transform(testY.reshape(testY.shape[0]*n_steps_out,len_output_features))
        
    return trainPredict,trainY_,testPredict,testY_

def get_metrics(trainY,trainPredict,testY,testPredict):
    
    trainMAPE  = Ep.MAPE(trainPredict,trainY)
    testMAPE  = Ep.MAPE(testPredict,testY)
    
    train_sMAPE  = Ep.sMAPE(trainY,trainPredict)
    test_sMAPE  = Ep.sMAPE(testY,testPredict)
    
    return trainMAPE,testMAPE,train_sMAPE,test_sMAPE

In [3]:
def get_dataset_hourly(n_steps_in,n_steps_out,overlap,nombre_series_horaria,output_columns,data,day,
                       start_date_train,start_date_val,start_date_test,end_date_test):
    
    inputs_columns = nombre_series_horaria

    len_input_features = len(inputs_columns)
    len_output_features = len(output_columns)

    results = Ep.SplitTimeseriesMultipleTimesBackAhead(df=data,
                                                       day=day,
                                                       start_date_train=start_date_train,
                                                       start_date_val=start_date_val,
                                                       start_date_test=start_date_test,
                                                       end_date_test=end_date_test,
                                                       n_steps_out=n_steps_out,
                                                       n_steps_in=n_steps_in,
                                                       overlap=overlap,
                                                       input_features=inputs_columns,
                                                       output_features=output_columns)

    return results

In [4]:
def crear_callbacks():
    
    callback_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                              factor=0.1,
                                                              min_lr=1e-5,
                                                              patience=5,
                                                              verbose=1)

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=10,
                                                      mode='min')

    callbacks = [callback_reduce_lr,early_stopping]
    
    return callbacks

In [5]:
def generar_arquitecturas(input_shape,n_steps_out):

    arquitectura31 = CustomHyperModel.Arquitectura31(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura32 = CustomHyperModel.Arquitectura32(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura36 = CustomHyperModel.Arquitectura36(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura37 = CustomHyperModel.Arquitectura37(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura43 = CustomHyperModel.Arquitectura43(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura45 = CustomHyperModel.Arquitectura45(input_shape=input_shape,n_steps_out=n_steps_out)
    arquitectura46 = CustomHyperModel.Arquitectura46(input_shape=input_shape,n_steps_out=n_steps_out)

    arq_list = [arquitectura31,arquitectura32,arquitectura36,arquitectura37,arquitectura43,arquitectura45,arquitectura46]
    
    return arq_list

# DATASET

## Load Dataset

In [6]:
data_horaria_path = os.path.join('..','..','dataset','Series','Sabanas','Original','Sabana_Datos_Horaria.xlsx')
data_horaria = pd.read_excel(data_horaria_path)
data_horaria = data_horaria.set_index('Fecha')

In [7]:
nombre_series_horaria = data_horaria.columns

In [8]:
precio_bolsa_path = os.path.join('..','..','dataset','Series','Sabanas','Original','Sabana_Datos_Precio_Bolsa.xlsx')
precio_bolsa = pd.read_excel(precio_bolsa_path)
precio_bolsa = precio_bolsa.set_index('Fecha')

In [9]:
data_horaria_full = pd.concat([data_horaria,precio_bolsa],axis=1)

In [10]:
data_horaria_full['day_of_week'] = data_horaria_full.index.day_name()
precio_bolsa['day_of_week'] = precio_bolsa.index.day_name()

## Build Window

In [11]:
Days = np.array(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])

start_date_train = '2000-02-01'
start_date_val = '2020-01-01'
start_date_test = '2020-04-01'
end_date_test = '2020-05-01'
n_steps_out=24
output_columns = ['$kWh']
len_output_features = len(output_columns)
n_steps_in = 48
overlap = 24

In [12]:
dict_days_test = dict()

for j,d in enumerate(Days):
    
    results_from_hourly = get_dataset_hourly(n_steps_in=n_steps_in,n_steps_out=n_steps_out,overlap=overlap,
                                             nombre_series_horaria=nombre_series_horaria,
                                             output_columns=output_columns,data=data_horaria_full,
                                             day=d,start_date_train=start_date_train,
                                             start_date_val=start_date_val,start_date_test=start_date_test,
                                             end_date_test=end_date_test)
    
    trainX_H,trainY_H,valX_H,valY_H,testX_H,testY_H,scaler_H_x,scaler_H_y,df2_H,dataset_H = results_from_hourly
    
    hourly_input_shape = (trainX_H.shape[1],trainX_H.shape[2])
    callbacks = crear_callbacks()
    arq_list = generar_arquitecturas(hourly_input_shape,n_steps_out)
    
    arq_names = [31,32,36,37,43,45,46]
    arq_idx = 0
    arq_best_models = dict()

    for arq in arq_list:
        
        project_name = '{}-Arquitectura{}'.format(d,arq_names[arq_idx])

        bayesian_tuner = BayesianOptimization(
            arq,
            objective='val_loss',
            num_initial_points=1,
            max_trials=10,
            directory=os.path.normpath('C:/my_dir'),
            project_name=project_name
        )

        # Overview of the task
        bayesian_tuner.search_space_summary()

        # Performs the hyperparameter tuning
        search_start = time.time()
        bayesian_tuner.search(x=trainX_H,y=trainY_H,
                          epochs=200,
                          validation_data=(valX_H,valY_H),
                          callbacks=callbacks)
        search_end = time.time()
        elapsed_time = search_end - search_start

        dict_key = project_name

        arq_best_models[dict_key] = dict()
        bs_model = bayesian_tuner.oracle.get_best_trials(1)[0]

        model = bayesian_tuner.get_best_models(num_models=1)[0]

        trainPredict,trainY_true,testPredict,testY_true = make_predictions(model,scaler_H_y,trainX_H,trainY_H,valX_H,valY_H,
                                                                           n_steps_out,len_output_features)

        trainMAPE,testMAPE,train_sMAPE,test_sMAPE = get_metrics(trainY_true,trainPredict,testY_true,testPredict)

        arq_best_models[dict_key]['Score'] = bs_model.score
        arq_best_models[dict_key]['Tiempo Scaneo'] = elapsed_time
        arq_best_models[dict_key]['Mape Train'] = trainMAPE
        arq_best_models[dict_key]['Mape Test'] = testMAPE
        arq_best_models[dict_key]['sMape Train'] = train_sMAPE
        arq_best_models[dict_key]['sMape Test'] = test_sMAPE

        if bs_model.hyperparameters.values:
            for hp, value in bs_model.hyperparameters.values.items():
                arq_best_models[dict_key][hp] = value

        arq_idx += 1
        
    with open('{}-BestModels.json'.format(d), 'w') as outfile:
        json.dump(arq_best_models, outfile)

Trial 10 Complete [00h 00m 05s]
val_loss: 3694.46240234375

Best val_loss So Far: 731.3621826171875
Total elapsed time: 00h 02m 14s
INFO:tensorflow:Oracle triggered exit
