In [1]:
import boto3
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import time
from sklearn.preprocessing import MinMaxScaler
from PIL import Image
import json

import CustomHyperModel
import EnergyPricesLibrary as Ep

from kerastuner.tuners import BayesianOptimization

%load_ext autoreload
%autoreload 2

# DATASET

## Download

In [None]:
s3_resource = boto3.resource('s3',
                             aws_access_key_id='AKIA4NVVYWBFHY2KRSMC',
                             aws_secret_access_key='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')
bucket = s3_resource.Bucket('colombia-energy-forecast')

for obj in bucket.objects.filter():
    if not os.path.exists(os.path.dirname(obj.key)):
        os.makedirs(os.path.dirname(obj.key))
    bucket.download_file(obj.key, obj.key) # save to same path

In [2]:
climatic_images_prcp_dir = os.path.join('dataset','Climatic Images','PRCP')

In [3]:
climatic_images_tavg_dir = os.path.join('dataset','Climatic Images','TAVG')

In [4]:
precio_bolsa_path = os.path.join('dataset','Series','Sabanas','Original','Sabana_Datos_Precio_Bolsa.xlsx')
precio_bolsa = pd.read_excel(precio_bolsa_path)
precio_bolsa = precio_bolsa.set_index('Fecha')

## Build Window

In [5]:
lista_fechas = list()
lista_rutas = list()
for prcp_file,tavg_file in zip(os.listdir(climatic_images_prcp_dir),os.listdir(climatic_images_tavg_dir)):
    fecha = prcp_file.split('.')[0]
    ruta_prcp = os.path.join(climatic_images_prcp_dir,prcp_file)
    ruta_tavg = os.path.join(climatic_images_tavg_dir,tavg_file)
    lista_fechas.append(fecha)
    lista_rutas.append([ruta_prcp,ruta_tavg])

In [6]:
dataset_df = pd.DataFrame(lista_rutas,index=lista_fechas,columns=['Precipitacion','Temperatura'])

In [7]:
TimeSplit_down = '2000-02-01'
TimeSplit_middle = '2020-01-01'
TimeSplit_top = '2020-03-31'
n_steps_out = 24 
n_steps_in  = 3
overlap = 2

output_features = ['$kWh']
len_output_features = len(output_features)

In [8]:
#IMG_HEIGHT,IMG_WIDTH = 256,256

#En caso de que exista problemas de alocación de memoria, descomentar la siguiente línea y comentar la anterior
IMG_HEIGHT,IMG_WIDTH = 128,128

In [9]:
trainX, trainY, testX, testY, scaler_y, dataset_x, dataset_y = Ep.SplitTimeseriesMultipleTimesBackAhead_DifferentTimes_Images(
    df_x=dataset_df,
    df_y=precio_bolsa,
    TimeSplit_down=TimeSplit_down,
    TimeSplit_middle=TimeSplit_middle,
    TimeSplit_top=TimeSplit_top,
    n_steps_out=n_steps_out,
    n_steps_in=n_steps_in,
    overlap=overlap,
    output_features=output_features,
    IMG_HEIGHT=IMG_HEIGHT,
    IMG_WIDTH=IMG_WIDTH)

In [10]:
trainX.shape, trainY.shape, testX.shape, testY.shape

((3636, 3, 128, 128, 6), (3636, 24, 1), (46, 3, 128, 128, 6), (46, 24, 1))

## Model

In [11]:
callback_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          factor=0.1,
                                                          min_lr=1e-4,
                                                          patience=0,
                                                          verbose=1)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  patience=5,
                                                  mode='min')

callbacks = [callback_reduce_lr,early_stopping]

In [12]:
arquitectura25 = CustomHyperModel.Arquitectura25(input_shape=trainX[0].shape,output_units=len_output_features,n_steps_out=n_steps_out)
arquitectura26 = CustomHyperModel.Arquitectura26(input_shape=trainX[0].shape,output_units=len_output_features,n_steps_out=n_steps_out)
arquitectura27 = CustomHyperModel.Arquitectura27(input_shape=trainX[0].shape,output_units=len_output_features,n_steps_out=n_steps_out)
arquitectura28 = CustomHyperModel.Arquitectura28(input_shape=trainX[0].shape,output_units=len_output_features,n_steps_out=n_steps_out)

In [13]:
arq_list = [arquitectura25,arquitectura26,arquitectura27,arquitectura28]

In [14]:
arq_idx = 25
arq_best_models = dict()

for arq in arq_list:
    
    bayesian_tuner = BayesianOptimization(
        arq,
        objective='val_mean_absolute_percentage_error',
        num_initial_points=1,
        max_trials=15,
        directory=os.path.normpath('C:/my_dir'),
        project_name=str(arq_idx)
    )
    
    # Overview of the task
    bayesian_tuner.search_space_summary()
    
    # Performs the hyperparameter tuning
    search_start = time.time()
    bayesian_tuner.search(x=trainX,y=trainY,
                      epochs=200,
                      validation_data=(testX,testY),
                      callbacks=callbacks)
    search_end = time.time()
    elapsed_time = search_end - search_start
    
    print('Tiempo Total Transcurrido {}'.format(elapsed_time))
    
    # Show a summary of the search
    #bayesian_tuner.results_summary()
    
    dict_key = 'Arquitectura {}'.format(arq_idx)

    arq_best_models[dict_key] = dict()
    bs_model = bayesian_tuner.oracle.get_best_trials(1)[0]

    arq_best_models[dict_key]['Score'] = bs_model.score
    arq_best_models[dict_key]['Tiempo Scaneo'] = elapsed_time

    if bs_model.hyperparameters.values:
        for hp, value in bs_model.hyperparameters.values.items():
            arq_best_models[dict_key][hp] = value
    
    arq_idx += 1

Trial 15 Complete [00h 03m 05s]
val_mean_absolute_percentage_error: 49.1245002746582

Best val_mean_absolute_percentage_error So Far: 43.9294548034668
Total elapsed time: 00h 54m 46s
INFO:tensorflow:Oracle triggered exit
Tiempo Total Transcurrido 3286.242907524109


In [15]:
with open('BestModels.json', 'w') as outfile:
    json.dump(arq_best_models, outfile)

In [16]:
arq_best_models

{'Arquitectura 25': {'Score': 29.02768325805664,
  'Tiempo Scaneo': 2016.7844245433807,
  'conv2d_filters_layer_1': 64,
  'conv2d_kernel_layer_1': 3,
  'conv2d_padding_layer_1': 'same',
  'conv2d_filters_layer_3': 64,
  'conv2d_kernel_layer_3': 5,
  'conv2d_padding_layer_3': 'valid',
  'conv2d_filters_layer_5': 64,
  'conv2d_kernel_layer_5': 3,
  'conv2d_padding_layer_5': 'same',
  'lstm_units_layer_7': 64,
  'kernel_regularizer_layer_7': 0.0,
  'dropout_regularizer_layer_7': 0.0,
  'learning_rate': 0.0001},
 'Arquitectura 26': {'Score': 48.08729553222656,
  'Tiempo Scaneo': 3316.5392682552338,
  'conv2d_filters_layer_1': 48,
  'conv2d_kernel_layer_1': 3,
  'conv2d_padding_layer_1': 'valid',
  'conv2d_filters_layer_3': 24,
  'conv2d_kernel_layer_3': 3,
  'conv2d_padding_layer_3': 'valid',
  'conv2d_filters_layer_5': 60,
  'conv2d_kernel_layer_5': 3,
  'conv2d_padding_layer_5': 'same',
  'lstm_units_layer_7': 256,
  'kernel_regularizer_layer_7': 0.09,
  'dropout_regularizer_layer_7': 0.