In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import time
import json

import matplotlib.pyplot as plt
import CustomMetrics
import EnergyPricesLibrary as Ep

from kerastuner.tuners import BayesianOptimization

%load_ext autoreload
%autoreload 2

In [None]:
def make_predictions(model,scaler_y,trainX,trainY,testX,testY,n_steps_out,len_output_features):
    
    # make predictions
    trainPredict = model.predict(trainX)
    trainPredict = trainPredict.reshape(trainPredict.shape[0]*n_steps_out,len_output_features)
    testPredict  = model.predict(testX)
    testPredict  = testPredict.reshape(testPredict.shape[0]*n_steps_out,len_output_features)
    
    # invert predictions
    trainPredict = scaler_y.inverse_transform(trainPredict)
    trainY_ = scaler_y.inverse_transform(trainY.reshape(trainY.shape[0]*n_steps_out,len_output_features))
    
    testPredict = scaler_y.inverse_transform(testPredict)
    testY_ = scaler_y.inverse_transform(testY.reshape(testY.shape[0]*n_steps_out,len_output_features))
        
    return trainPredict,trainY_,testPredict,testY_

def get_metrics(trainY,trainPredict,testY,testPredict):
    
    trainMAPE  = Ep.MAPE(trainPredict,trainY)
    testMAPE  = Ep.MAPE(testPredict,testY)
    
    train_sMAPE  = Ep.sMAPE(trainY,trainPredict)
    test_sMAPE  = Ep.sMAPE(testY,testPredict)
    
    return trainMAPE,testMAPE,train_sMAPE,test_sMAPE

In [None]:
def get_dataset_hourly(n_steps_in,n_steps_out,overlap,nombre_series_horaria,output_columns,data,day,
                       start_date_train,start_date_val,start_date_test,end_date_test):
    
    inputs_columns = nombre_series_horaria

    len_input_features = len(inputs_columns)
    len_output_features = len(output_columns)

    results = Ep.SplitTimeseriesMultipleTimesBackAhead(df=data,
                                                       day=day,
                                                       start_date_train=start_date_train,
                                                       start_date_val=start_date_val,
                                                       start_date_test=start_date_test,
                                                       end_date_test=end_date_test,
                                                       n_steps_out=n_steps_out,
                                                       n_steps_in=n_steps_in,
                                                       overlap=overlap,
                                                       input_features=inputs_columns,
                                                       output_features=output_columns)

    return results

In [None]:
def crear_callbacks():
    
    callback_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                              factor=0.1,
                                                              min_lr=1e-5,
                                                              patience=5,
                                                              verbose=1)

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=10,
                                                      mode='min')

    callbacks = [callback_reduce_lr,early_stopping]
    
    return callbacks

In [None]:
def build_model_I2(hourly_input_shape,n_steps_out=24):
    
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.LSTM(
            input_shape=hourly_input_shape,
            units=320,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.0975),
            dropout=0.27,
            return_sequences=True
        )
    )

    model.add(
        tf.keras.layers.LSTM(   
            units=448,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.03),
            dropout=0.36,
            return_sequences=False
        )
    )

    model.add(tf.keras.layers.Dense(units=n_steps_out,activation=None))

    model.compile(
        optimizer=tf.optimizers.Adam(0.0002),
        loss=CustomMetrics.symmetric_mean_absolute_percentage_error,
        metrics=[tf.metrics.MeanAbsoluteError(),
                 tf.keras.metrics.MeanAbsolutePercentageError(),
                 CustomMetrics.symmetric_mean_absolute_percentage_error]
    )
    
    return model

In [None]:
def build_model_I3(hourly_input_shape,n_steps_out=24):
    
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.LSTM(
            input_shape=hourly_input_shape,
            units=128,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.06),
            dropout=0.09,
            return_sequences=True
        )
    )

    model.add(
        tf.keras.layers.LSTM(   
            units=384,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.0225),
            dropout=0,
            return_sequences=False
        )
    )

    model.add(tf.keras.layers.Dense(units=n_steps_out,activation=None))

    model.compile(
        optimizer=tf.optimizers.Adam(0.000103025),
        loss=CustomMetrics.symmetric_mean_absolute_percentage_error,
        metrics=[tf.metrics.MeanAbsoluteError(),
                 tf.keras.metrics.MeanAbsolutePercentageError(),
                 CustomMetrics.symmetric_mean_absolute_percentage_error]
    )
    
    return model

In [None]:
def build_model_I5(hourly_input_shape,n_steps_out=24):
    
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.GRU(
            input_shape=hourly_input_shape,
            units=448,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.06),
            dropout=0.36,
            return_sequences=True
        )
    )

    model.add(
        tf.keras.layers.GRU(   
            units=128,
            activation='tanh',
            kernel_regularizer=tf.keras.regularizers.L1(l1=0.09),
            dropout=0.36,
            return_sequences=False
        )
    )

    model.add(tf.keras.layers.Dense(units=n_steps_out,activation=None))

    model.compile(
        optimizer=tf.optimizers.Adam(0.00027),
        loss=CustomMetrics.symmetric_mean_absolute_percentage_error,
        metrics=[tf.metrics.MeanAbsoluteError(),
                 tf.keras.metrics.MeanAbsolutePercentageError(),
                 CustomMetrics.symmetric_mean_absolute_percentage_error]
    )
    
    return model

# DATASET

## Load Dataset

In [None]:
data_horaria_path = os.path.join('dataset','Series','Sabanas','Original','Sabana_Datos_Horaria.xlsx')
data_horaria = pd.read_excel(data_horaria_path)
data_horaria = data_horaria.set_index('Fecha')

In [None]:
nombre_series_horaria = data_horaria.columns

In [None]:
precio_bolsa_path = os.path.join('dataset','Series','Sabanas','Original','Sabana_Datos_Precio_Bolsa.xlsx')
precio_bolsa = pd.read_excel(precio_bolsa_path)
precio_bolsa = precio_bolsa.set_index('Fecha')

In [None]:
data_horaria_full = pd.concat([data_horaria,precio_bolsa],axis=1)

In [None]:
data_horaria_full['day_of_week'] = data_horaria_full.index.day_name()
precio_bolsa['day_of_week'] = precio_bolsa.index.day_name()

## Build Window

In [None]:
Days = np.array(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])

start_date_train = '2000-02-01'
start_date_val = '2020-01-01'
start_date_test = '2020-04-01'
end_date_test = '2020-05-01'
n_steps_out=24
output_columns = ['$kWh']
len_output_features = len(output_columns)
n_steps_in = 120
overlap = 24

In [None]:
dict_days_test = dict()

fig, axs = plt.subplots(4, 2, sharex='all',figsize=(10,7))
fig.set_size_inches(20,20)
k1 = 0
k2 = 0

for j,d in enumerate(Days):
    
    results_from_hourly = get_dataset_hourly(n_steps_in=n_steps_in,n_steps_out=n_steps_out,overlap=overlap,
                                             nombre_series_horaria=nombre_series_horaria,
                                             output_columns=output_columns,data=data_horaria_full,
                                             day=d,start_date_train=start_date_train,
                                             start_date_val=start_date_val,start_date_test=start_date_test,
                                             end_date_test=end_date_test)
    
    trainX_H,trainY_H,valX_H,valY_H,testX_H,testY_H,scaler_H_x,scaler_H_y,dataset_x,dataset_y = results_from_hourly
    
    trainX_H = np.concatenate([trainX_H,valX_H])
    trainY_H = np.concatenate([trainY_H,valY_H])
    
    hourly_input_shape = (trainX_H.shape[1],trainX_H.shape[2])
    callbacks = crear_callbacks()
    model = build_model_I5(hourly_input_shape,n_steps_out)
    
    model.fit(trainX_H, trainY_H, validation_data=(testX_H,testY_H),epochs=200,callbacks=callbacks,verbose=1)
    
    trainPredict,trainY_true,testPredict,testY_true = make_predictions(model,scaler_H_y,trainX_H,trainY_H,testX_H,testY_H,
                                                                       n_steps_out,len_output_features)

    trainMAPE,testMAPE,train_sMAPE,test_sMAPE = get_metrics(trainY_true,trainPredict,testY_true,testPredict)

    dict_days_test[d] = dict()
    dict_days_test[d]['trainPredict'] = trainPredict
    dict_days_test[d]['testPredict'] = testPredict
    dict_days_test[d]['testY'] = testY_true
    dict_days_test[d]['trainMAPE'] = trainMAPE
    dict_days_test[d]['testMAPE'] = testMAPE
    dict_days_test[d]['train_sMAPE'] = train_sMAPE
    dict_days_test[d]['test_sMAPE'] = test_sMAPE
    
    Nt = trainPredict.shape[0] + testPredict.shape[0]
    trainPredictPlot = np.zeros((Nt,1))
    trainPredictPlot[:,:] = np.nan
    trainPredictPlot[:len(trainPredict), :] = np.concatenate((dataset_y[0].reshape(1,1),trainPredict[:-1]))

    # shift test predictions for plotting
    testPredictPlot = np.zeros((Nt,1))
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict):Nt, :] = testPredict
    
    k1 = j
    if j > 3:
        k1 = j-4
        k2 = 1
    
    axs[k1,k2].plot(np.concatenate((trainY_true,testY_true)),label='Original data')
    axs[k1,k2].plot(trainPredictPlot,label='Training predictions')
    axs[k1,k2].plot(testPredictPlot,label='Test prediction')
    
    axs[k1,k2].set_title(d)
    axs[k1,k2].legend()
    axs[k1,k2].set_ylabel('COL/kWh')

In [None]:
Days = np.array(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
fig, axs = plt.subplots(4, 2, sharex='all',figsize=(15,8))
fig.set_size_inches(20,20)
k1 = 0
k2 = 0

for j,d in enumerate(Days):
    
    trainPredict = dict_days_test[d]['trainPredict']
    testPredict = dict_days_test[d]['testPredict']
    testY = dict_days_test[d]['testY']
    
    trainMAPE = dict_days_test[d]['trainMAPE']
    testMAPE = dict_days_test[d]['testMAPE']
    train_sMAPE = dict_days_test[d]['train_sMAPE']
    test_sMAPE = dict_days_test[d]['test_sMAPE']
    
    Nt = trainPredict.shape[0] + testPredict.shape[0]
    
    print('{} -> Train Mape:{},Test Mape:{},Train sMAPE:{}, Test sMAPE:{}'.format(d,trainMAPE,testMAPE,train_sMAPE,test_sMAPE))    
    
    testOriginalPlot = np.zeros((Nt,1))
    testOriginalPlot[:, :] = np.nan
    testOriginalPlot[len(trainPredict):Nt, :] = testY
    
    testPredictPlot = np.zeros((Nt,1))
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict):Nt, :] = testPredict
    
    k1 = j
    if j > 3:
        k1 = j-4
        k2 = 1
    
    axs[k1,k2].plot(testOriginalPlot,label='Original data')
    axs[k1,k2].plot(testPredictPlot,label='Test prediction')
    
    axs[k1,k2].set_title(d)
    axs[k1,k2].legend()
    axs[k1,k2].set_ylabel('COL/kWh')