In [1]:
import boto3
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import time
import sys
from sklearn.preprocessing import MinMaxScaler
from PIL import Image
import json

sys.path.append('..')
import CustomHyperModelImages
import EnergyPricesLibrary as Ep

from kerastuner.tuners import BayesianOptimization

%load_ext autoreload
%autoreload 2

In [2]:
def make_predictions(model,scaler_y,trainX,trainY,testX,testY,n_steps_out,len_output_features):
    
    # make predictions
    trainPredict = model.predict(trainX)
    trainPredict = trainPredict.reshape(trainPredict.shape[0]*n_steps_out,len_output_features)
    testPredict  = model.predict(testX)
    testPredict  = testPredict.reshape(testPredict.shape[0]*n_steps_out,len_output_features)
    
    # invert predictions
    trainPredict = scaler_y.inverse_transform(trainPredict)
    trainY_ = scaler_y.inverse_transform(trainY.reshape(trainY.shape[0]*n_steps_out,len_output_features))
    
    testPredict = scaler_y.inverse_transform(testPredict)
    testY_ = scaler_y.inverse_transform(testY.reshape(testY.shape[0]*n_steps_out,len_output_features))
        
    return trainPredict,trainY_,testPredict,testY_

def get_metrics(trainY,trainPredict,testY,testPredict):
    
    trainMAPE  = Ep.MAPE(trainPredict,trainY)
    testMAPE  = Ep.MAPE(testPredict,testY)
    
    return trainMAPE,testMAPE

# DATASET

## Download

In [3]:
"""
s3_resource = boto3.resource('s3',
                             aws_access_key_id='AKIA4NVVYWBFHY2KRSMC',
                             aws_secret_access_key='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')
bucket = s3_resource.Bucket('colombia-energy-forecast')

for obj in bucket.objects.filter():
    if not os.path.exists(os.path.dirname(obj.key)):
        os.makedirs(os.path.dirname(obj.key))
    bucket.download_file(obj.key, obj.key) # save to same path
"""

"\ns3_resource = boto3.resource('s3',\n                             aws_access_key_id='AKIA4NVVYWBFHY2KRSMC',\n                             aws_secret_access_key='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')\nbucket = s3_resource.Bucket('colombia-energy-forecast')\n\nfor obj in bucket.objects.filter():\n    if not os.path.exists(os.path.dirname(obj.key)):\n        os.makedirs(os.path.dirname(obj.key))\n    bucket.download_file(obj.key, obj.key) # save to same path\n"

In [4]:
climatic_images_prcp_dir = os.path.join('dataset','Climatic Images','PRCP')

In [5]:
climatic_images_tavg_dir = os.path.join('dataset','Climatic Images','TAVG')

In [6]:
precio_bolsa_path = os.path.join('dataset','Series','Sabanas','Original','Sabana_Datos_Precio_Bolsa.xlsx')
precio_bolsa = pd.read_excel(precio_bolsa_path)
precio_bolsa = precio_bolsa.set_index('Fecha')

## Build Window

In [7]:
lista_fechas = list()
lista_rutas = list()
for prcp_file,tavg_file in zip(os.listdir(climatic_images_prcp_dir),os.listdir(climatic_images_tavg_dir)):
    fecha = prcp_file.split('.')[0]
    ruta_prcp = os.path.join(climatic_images_prcp_dir,prcp_file)
    ruta_tavg = os.path.join(climatic_images_tavg_dir,tavg_file)
    lista_fechas.append(fecha)
    lista_rutas.append([ruta_prcp,ruta_tavg])

In [8]:
dataset_df = pd.DataFrame(lista_rutas,index=lista_fechas,columns=['Precipitacion','Temperatura'])

In [9]:
TimeSplit_down = '2000-02-01'
TimeSplit_middle = '2020-01-01'
TimeSplit_top = '2020-03-31'
n_steps_out = 24 
n_steps_in  = 2
overlap = 1

output_features = ['$kWh']
len_output_features = len(output_features)

In [10]:
#IMG_HEIGHT,IMG_WIDTH = 256,256

#En caso de que exista problemas de alocación de memoria, descomentar la siguiente línea y comentar la anterior
IMG_HEIGHT,IMG_WIDTH = 128,128

In [11]:
trainX, trainY, testX, testY, scaler_y, dataset_x, dataset_y = Ep.SplitTimeseriesMultipleTimesBackAhead_DifferentTimes_Images(
    df_x=dataset_df,
    df_y=precio_bolsa,
    TimeSplit_down=TimeSplit_down,
    TimeSplit_middle=TimeSplit_middle,
    TimeSplit_top=TimeSplit_top,
    n_steps_out=n_steps_out,
    n_steps_in=n_steps_in,
    overlap=overlap,
    output_features=output_features,
    IMG_HEIGHT=IMG_HEIGHT,
    IMG_WIDTH=IMG_WIDTH)

In [12]:
trainX.shape, trainY.shape, testX.shape, testY.shape

((7271, 3, 128, 128, 6), (7271, 24, 1), (91, 3, 128, 128, 6), (91, 24, 1))

## Model

In [13]:
callback_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          factor=0.1,
                                                          min_lr=1e-4,
                                                          patience=0,
                                                          verbose=1)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  patience=5,
                                                  mode='min')

callbacks = [callback_reduce_lr,early_stopping]

In [14]:
INPUT_SHAPE = trainX[0].shape

arquitectura1 = CustomHyperModelImages.ArquitecturaI1(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura2 = CustomHyperModelImages.ArquitecturaI2(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura3 = CustomHyperModelImages.ArquitecturaI3(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura4 = CustomHyperModelImages.ArquitecturaI4(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura5 = CustomHyperModelImages.ArquitecturaI5(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura6 = CustomHyperModelImages.ArquitecturaI6(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura7 = CustomHyperModelImages.ArquitecturaI7(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura8 = CustomHyperModelImages.ArquitecturaI8(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura9 = CustomHyperModelImages.ArquitecturaI9(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura10 = CustomHyperModelImages.ArquitecturaI10(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura11 = CustomHyperModelImages.ArquitecturaI11(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)
arquitectura12 = CustomHyperModelImages.ArquitecturaI12(input_shape=INPUT_SHAPE,n_steps_out=n_steps_out)

In [15]:
arq_list = [arquitectura1,arquitectura2,arquitectura3,arquitectura4,
            arquitectura5,arquitectura6,arquitectura7,arquitectura8,
            arquitectura9,arquitectura10,arquitectura11,arquitectura12]

In [16]:
arq_idx = 1
arq_best_models = dict()

for arq in arq_list:
    
    bayesian_tuner = BayesianOptimization(
        arq,
        objective='val_mean_absolute_percentage_error',
        num_initial_points=1,
        max_trials=10,
        directory=os.path.normpath('C:/my_dir'),
        project_name=str(arq_idx)
    )
    
    # Overview of the task
    bayesian_tuner.search_space_summary()
    
    # Performs the hyperparameter tuning
    search_start = time.time()
    bayesian_tuner.search(x=trainX,y=trainY,
                      epochs=200,
                      validation_data=(testX,testY),
                      callbacks=callbacks)
    search_end = time.time()
    elapsed_time = search_end - search_start
    
    print('Tiempo Total Transcurrido {}'.format(elapsed_time))
    
    dict_key = 'Arquitectura {}'.format(arq_idx)

    arq_best_models[dict_key] = dict()
    bs_model = bayesian_tuner.oracle.get_best_trials(1)[0]
    
    model = bayesian_tuner.get_best_models(num_models=1)[0]
    
    trainPredict,trainY_true,testPredict,testY_true = make_predictions(model,scaler_y,trainX,trainY,testX,testY,
                                                             n_steps_out,len_output_features)
    
    trainMAPE,testMAPE = get_metrics(trainY_true,trainPredict,testY_true,testPredict)

    arq_best_models[dict_key]['Score'] = bs_model.score
    arq_best_models[dict_key]['Tiempo Scaneo'] = elapsed_time
    arq_best_models[dict_key]['Mape Train'] = trainMAPE
    arq_best_models[dict_key]['Mape Test'] = testMAPE

    if bs_model.hyperparameters.values:
        for hp, value in bs_model.hyperparameters.values.items():
            arq_best_models[dict_key][hp] = value
    
    arq_idx += 1

Trial 10 Complete [00h 06m 26s]
val_mean_absolute_percentage_error: 63.43312072753906

Best val_mean_absolute_percentage_error So Far: 57.278263092041016
Total elapsed time: 00h 37m 49s
INFO:tensorflow:Oracle triggered exit
Tiempo Total Transcurrido 2269.7403433322906


In [17]:
with open('BestModels.json', 'w') as outfile:
    json.dump(arq_best_models, outfile)

In [18]:
arq_best_models

{'Arquitectura 1': {'Score': 35.97036361694336,
  'Tiempo Scaneo': 12496.204443454742,
  'Mape Train': 2.090536572559958,
  'Mape Test': 0.33286573082376814,
  'convLSTM2d_filters_layer_1': 8,
  'convLSTM2d_kernel_layer_1': 5,
  'conv2d_padding_layer_1': 'same',
  'convLSTM2d_filters_layer_3': 8,
  'convLSTM2d_kernel_layer_3': 3,
  'conv2d_padding_layer_3': 'valid',
  'convLSTM2d_filters_layer_5': 8,
  'convLSTM2d_kernel_layer_5': 5,
  'conv2d_padding_layer_5': 'valid',
  'pool2d_size_layer_6': 3,
  'dense_units_layer_8': 48,
  'dense_layer_activation': 'sigmoid',
  'learning_rate': 0.0057358015750987625},
 'Arquitectura 2': {'Score': 36.516178131103516,
  'Tiempo Scaneo': 9744.568314790726,
  'Mape Train': 0.6452693665052478,
  'Mape Test': 0.3381757872734552,
  'convLSTM2d_filters_layer_1': 8,
  'convLSTM2d_kernel_layer_1': 7,
  'conv2d_padding_layer_1': 'valid',
  'pool2d_size_layer_2': 3,
  'convLSTM2d_filters_layer_3': 8,
  'convLSTM2d_kernel_layer_3': 7,
  'conv2d_padding_layer_3