In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import time
import math
import sys
import matplotlib.pyplot as plt

sys.path.append('..')
import EnergyPricesLibrary as Ep
import CustomMetrics
import CustomHyperModelCompletos

from kerastuner.tuners import BayesianOptimization
from sklearn.metrics import mean_squared_error

%load_ext autoreload
%autoreload 2

In [2]:
def make_predictions(model,scaler_D_x,scaler_D_y,scaler_H_x,scaler_H_y,
                     trainX_D, trainY_D, testX_D, testY_D,
                     trainX_H, trainY_H, testX_H, testY_H,
                     trainX_I, trainY_I, testX_I, testY_I,
                     n_steps_out,len_output_features):
    
    # make predictions
    trainPredict = model.predict([trainX_H,trainX_D,trainX_I])
    trainPredict = trainPredict.reshape(trainPredict.shape[0]*n_steps_out,len_output_features)
    testPredict  = model.predict([testX_H,testX_D,testX_I])
    testPredict  = testPredict.reshape(testPredict.shape[0]*n_steps_out,len_output_features)
    
    # invert predictions
    trainPredict = scaler_D_y.inverse_transform(trainPredict)
    trainY = scaler_D_y.inverse_transform(trainY_D.reshape(trainY_D.shape[0]*n_steps_out,len_output_features))
    
    testPredict = scaler_D_y.inverse_transform(testPredict)
    testY = scaler_D_y.inverse_transform(testY_D.reshape(testY_D.shape[0]*n_steps_out,len_output_features))
        
    return trainPredict,trainY,testPredict,testY

def get_metrics(trainY,trainPredict,testY,testPredict):
    
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
    trainMAPE  = Ep.MAPE(trainPredict,trainY)
    print('Train Score: %.2f RMSE' % trainScore)
    print('Train Score: %.2f MAPE' % trainMAPE)

    testScore = math.sqrt(mean_squared_error(testY, testPredict))
    testMAPE  = Ep.MAPE(testPredict,testY)
    print('Test Score: %.2f RMSE' % testScore)
    print('Test Score: %.2f MAPE' % testMAPE)
    
    return trainScore,trainMAPE,testScore,testMAPE

In [3]:
data_diaria_path = os.path.join('..','..','..','dataset','Series','Sabanas','Original','Sabana_Datos_Diaria.xlsx')
data_diaria = pd.read_excel(data_diaria_path)
data_diaria = data_diaria.set_index('Fecha')

In [4]:
data_horaria_path = os.path.join('..','..','..','dataset','Series','Sabanas','Original','Sabana_Datos_Horaria.xlsx')
data_horaria = pd.read_excel(data_horaria_path)
data_horaria = data_horaria.set_index('Fecha')

In [5]:
climatic_images_prcp_dir = os.path.join('..','..','..','dataset','Climatic Images','PRCP')
climatic_images_tavg_dir = os.path.join('..','..','..','dataset','Climatic Images','TAVG')

In [6]:
precio_bolsa_path = os.path.join('..','..','..','dataset','Series','Sabanas','Original','Sabana_Datos_Precio_Bolsa.xlsx')
precio_bolsa = pd.read_excel(precio_bolsa_path)
precio_bolsa = precio_bolsa.set_index('Fecha')

In [7]:
nombre_series_diaria = data_diaria.columns.values
nombre_series_horaria = data_horaria.columns.values

In [8]:
data_horaria_full = pd.concat([data_horaria,precio_bolsa],axis=1)

In [9]:
data_horaria.shape,data_diaria.shape, precio_bolsa.shape

((177480, 85), (7395, 119), (177480, 1))

## Build Window

In [10]:
lista_fechas = list()
lista_rutas = list()
for prcp_file,tavg_file in zip(os.listdir(climatic_images_prcp_dir),os.listdir(climatic_images_tavg_dir)):
    fecha = prcp_file.split('.')[0]
    ruta_prcp = os.path.join(climatic_images_prcp_dir,prcp_file)
    ruta_tavg = os.path.join(climatic_images_tavg_dir,tavg_file)
    lista_fechas.append(fecha)
    lista_rutas.append([ruta_prcp,ruta_tavg])

In [11]:
d = 'All'
start_date_train = '2000-02-01'
start_date_val = '2020-01-01'
start_date_test = '2020-04-01'
end_date_test = '2020-05-01'
n_steps_out=24
output_columns = ['$kWh']

In [12]:
dataset_df = pd.DataFrame(lista_rutas,index=lista_fechas,columns=['Precipitacion','Temperatura'])
 
n_steps_in  = 2
overlap = 1
len_output_features = len(output_columns)

IMG_HEIGHT,IMG_WIDTH = 128,128

results = Ep.SplitTimeseriesMultipleTimesBackAhead_DifferentTimes_Images(df_x=dataset_df,df_y=precio_bolsa,
                                                                         start_date_train=start_date_train,
                                                                         start_date_val=start_date_val,
                                                                         start_date_test=start_date_test,
                                                                         end_date_test=end_date_test,n_steps_out=n_steps_out,
                                                                         n_steps_in=n_steps_in,overlap=overlap,
                                                                         output_features=output_columns,
                                                                         IMG_HEIGHT=IMG_HEIGHT,IMG_WIDTH=IMG_WIDTH)

trainX_I,trainY_I,valX_I,valY_I,testX_I,testY_I,scaler_y_I,dataset_x_I,dataset_y_I = results

In [13]:
n_steps_in = 2
overlap = 1
inputs_columns = nombre_series_diaria

len_input_features = len(inputs_columns)
len_output_features = len(output_columns)

results = Ep.SplitTimeseriesMultipleTimesBackAhead_differentTimes(df_x=data_diaria,
                                                                  df_y=precio_bolsa,
                                                                  day=d,
                                                                  start_date_train=start_date_train,start_date_val=start_date_val,
                                                                  start_date_test=start_date_test,end_date_test=end_date_test,
                                                                  n_steps_out=n_steps_out,n_steps_in=n_steps_in,
                                                                  overlap=overlap,input_features=inputs_columns,
                                                                  output_features=output_columns)

trainX_D,trainY_D,valX_D,valY_D,testX_D,testY_D,scaler_D_x,scaler_D_y,dataset_x_D, dataset_y_D = results

In [14]:
n_steps_in = 48
overlap = 24
inputs_columns = nombre_series_horaria

len_input_features = len(inputs_columns)
len_output_features = len(output_columns)

results = Ep.SplitTimeseriesMultipleTimesBackAhead(df=data_horaria_full,
                                                   day=d,
                                                   start_date_train=start_date_train,start_date_val=start_date_val,
                                                   start_date_test=start_date_test,end_date_test=end_date_test,
                                                   n_steps_out=n_steps_out,n_steps_in=n_steps_in,overlap=overlap,
                                                   input_features=inputs_columns,output_features=output_columns)

trainX_H,trainY_H,valX_H,valY_H,testX_H,testY_H,scaler_H_x,scaler_H_y,df2_H,dataset_H = results

### Train

In [15]:
'Diaria:',trainX_D.shape,trainY_D.shape,'Horaria:',trainX_H.shape, trainY_H.shape,'Imagenes:',trainX_I.shape, trainY_I.shape

('Diaria:',
 (7272, 2, 119),
 (7272, 24, 1),
 'Horaria:',
 (7272, 48, 85),
 (7272, 24, 1),
 'Imagenes:',
 (7272, 2, 128, 128, 6),
 (7272, 24, 1))

### Val

In [16]:
'Diaria:',valX_D.shape,valY_D.shape,'Horaria:',valX_H.shape,valY_H.shape,'Imagenes:',valX_I.shape,valY_I.shape

('Diaria:',
 (91, 2, 119),
 (91, 24, 1),
 'Horaria:',
 (91, 48, 85),
 (91, 24, 1),
 'Imagenes:',
 (91, 2, 128, 128, 6),
 (91, 24, 1))

### Test

In [17]:
'Diaria:',testX_D.shape, testY_D.shape,'Horaria:',testX_H.shape, testY_H.shape,'Imagenes:',testX_I.shape, testY_I.shape

('Diaria:',
 (30, 2, 119),
 (30, 24, 1),
 'Horaria:',
 (30, 48, 85),
 (30, 24, 1),
 'Imagenes:',
 (30, 2, 128, 128, 6),
 (30, 24, 1))

## Model

In [18]:
callback_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          factor=0.1,
                                                          min_lr=1e-5,
                                                          patience=0,
                                                          verbose=1)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  patience=5,
                                                  mode='min')

callbacks = [callback_reduce_lr,early_stopping]

In [19]:
hourly_input_shape = (trainX_H.shape[1],trainX_H.shape[2])
daily_input_shape = (trainX_D.shape[1],trainX_D.shape[2])
images_input_shape = trainX_I[0].shape

ModeloCompletoI_Concat_Version5 = CustomHyperModelCompletos.ModeloCompletoI_Concat_Version5(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

ModeloCompletoI_Suma_Version5 = CustomHyperModelCompletos.ModeloCompletoI_Suma_Version5(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

ModeloCompletoI_Concat_Version6 = CustomHyperModelCompletos.ModeloCompletoI_Concat_Version6(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

ModeloCompletoI_Suma_Version6 = CustomHyperModelCompletos.ModeloCompletoI_Suma_Version6(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

ModeloCompletoII_Concat_Version6 = CustomHyperModelCompletos.ModeloCompletoII_Concat_Version6(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

ModeloCompletoII_Suma_Version6 = CustomHyperModelCompletos.ModeloCompletoII_Suma_Version6(hourly_input_shape=hourly_input_shape,
                                                                                            daily_input_shape=daily_input_shape,
                                                                                            image_input_shape=images_input_shape,
                                                                                            n_steps_out=n_steps_out)

In [20]:
arq_best_models = dict()
    
bayesian_tuner = BayesianOptimization(
    ModeloCompletoII_Suma_Version6,
    objective='val_mean_absolute_percentage_error',
    num_initial_points=1,
    max_trials=10,
    directory=os.path.normpath('C:/my_dir'),
    project_name='tuning'
)

# Overview of the task
bayesian_tuner.search_space_summary()

# Performs the hyperparameter tuning
search_start = time.time()
bayesian_tuner.search(x=[trainX_H,trainX_D,trainX_I], y=trainY_D,
                  epochs=200,
                  validation_data=([valX_H,valX_D,valX_I],valY_D),
                  callbacks=callbacks)
search_end = time.time()
elapsed_time = search_end - search_start

print('Tiempo Total Transcurrido {}'.format(elapsed_time))

dict_key = 'Arquitectura'

arq_best_models[dict_key] = dict()
bs_model = bayesian_tuner.oracle.get_best_trials(1)[0]

model = bayesian_tuner.get_best_models(num_models=1)[0]

trainPredict,trainY,valPredict,valY = make_predictions(model,scaler_D_x,scaler_D_y,scaler_H_x,scaler_H_y,
                                                     trainX_D, trainY_D, valX_D, valY_D,
                                                     trainX_H, trainY_H, valX_H, valY_H,
                                                     trainX_I, trainY_I, valX_I, valY_I,
                                                     n_steps_out,len_output_features)

trainScore,trainMAPE,valScore,valMAPE = get_metrics(trainY,trainPredict,valY,valPredict)

arq_best_models[dict_key]['Score'] = bs_model.score
arq_best_models[dict_key]['Tiempo Scaneo'] = elapsed_time
arq_best_models[dict_key]['Mape Train'] = trainMAPE
arq_best_models[dict_key]['Mape Test'] = valMAPE

if bs_model.hyperparameters.values:
    for hp, value in bs_model.hyperparameters.values.items():
        arq_best_models[dict_key][hp] = value

Trial 3 Complete [00h 05m 00s]
val_mean_absolute_percentage_error: 22.792190551757812

Best val_mean_absolute_percentage_error So Far: 22.792190551757812
Total elapsed time: 00h 16m 07s

Search: Running Trial #4

Hyperparameter    |Value             |Best Value So Far 
learning_rate     |0.0002649         |0.00022032        

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 00007: ReduceLROnPlateau reducing learning rate to 2.6489890296943488e-05.
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 00010: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200


Epoch 18/200
Epoch 19/200
 29/228 [==>...........................] - ETA: 11s - loss: 0.0227 - mean_absolute_error: 0.0161 - mean_absolute_percentage_error: 12633.7432 - symmetric_mean_absolute_percentage_error: 1237.7286

InternalError:    Failed to call ThenRnnBackward with model config: [rnn_mode, rnn_input_mode, rnn_direction_mode]: 3, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]: [1, 85, 320, 1, 48, 32, 0] 
	 [[{{node gradients/CudnnRNN_grad/CudnnRNNBackprop}}]]
	 [[PartitionedCall_3]] [Op:__inference_train_function_122670]

Function call stack:
train_function -> train_function -> train_function


In [None]:
arq_best_models