In [2]:
import pandas as pd
import os, time
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop
from epftoolbox.evaluation import MAE, MAPE, RMSE, rMAE
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import keras_tuner

In [3]:
os.chdir('..')
tf.random.set_seed(123)

In [28]:
# Funciones auxiliares
# Romper el dataset
def train_test_vali(serie, train_size, valida_size, ts_size):
    total_data = serie.shape[0]

    train_data = int(total_data * train_size)
    valid_data = int(total_data * valida_size)
    test_data = total_data - train_data - valid_data

    train = serie[0:train_data]
    vali = serie[train_data:train_data + valid_data]
    test = serie[train_data + valid_data:]
    return train, vali, test

#Crear dataset supervisado
def to_sequences(SEQUENCE_SIZE, obs):
    x = []
    y = []

    for i in range(len(obs)-SEQUENCE_SIZE):
        window = obs[i:(i+SEQUENCE_SIZE)]
        after_window = obs[i+SEQUENCE_SIZE]
        window = [[x] for x in window]
        x.append(window)
        y.append(after_window)
        
    return np.array(x),np.array(y)

# Auxiliares del modelo
def root_mean_squared_error(y_true, y_pred):
    rmse = tf.math.sqrt(tf.math.reduce_mean(tf.square(y_pred-y_true)))
    return rmse

# Función para medir el tiempo de ejecución
def elapsed_time(start_time):
    return time.time() - start_time

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def call_existing_code(head_size, num_heads, ff_dim, dropout, mlp_dropout):
    inputs = keras.Input(shape=(24, 1))
    x = inputs
    x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    x = layers.Dense(32, activation="relu")(x)
    x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1)(x)
    model = keras.Model(inputs, outputs)
    model.compile(
        loss=root_mean_squared_error,
        optimizer=RMSprop(learning_rate=5e-5)
    )
    return model

def build_model(hp):
    head_size = hp.Int("head_size", min_value=32, max_value=128, step=32)
    num_heads = hp.Choice("num_heads", values=[1, 2])
    ff_dim = hp.Int("ff_dim", min_value=1, max_value=3)
    dropout = hp.Float("dropout", min_value=0.01, max_value=0.5)
    mlp_dropout = hp.Float("mlp_dropout", min_value=0.01, max_value=0.5)
    model = call_existing_code(head_size, num_heads, ff_dim, dropout, mlp_dropout)
    return model

In [29]:
# Crear diferentes modelos base para cada tipo de particion
types = [(0.9,0.05,0.05)]
lista_results = []
INPUT_LENGTH = 24    # Registros de 24 horas consecutivas a la entrada
OUTPUT_LENGTH = 24   # El modelo va a predecir 24 horas a futuro
epochs = 50
for i in types:
    scaler = MinMaxScaler()
    df = pd.read_csv('df_data_colum.csv', parse_dates=['date_hour'], index_col='date_hour')
    df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
    tr, vl, ts = train_test_vali(df_scaled['price'], i[0], i[1], i[2])

    x_tr, y_tr = to_sequences(OUTPUT_LENGTH, tr.values)
    x_vl, y_vl = to_sequences(OUTPUT_LENGTH, vl.values)
    x_ts, y_ts = to_sequences(OUTPUT_LENGTH, ts.values)

    tuner = keras_tuner.BayesianOptimization(
        hypermodel=build_model,
        objective="val_loss",
        max_trials=30,
        executions_per_trial=1,
        overwrite=True,
        directory="enconder_model",
        project_name="tunning_model"
    )

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0003)
    tuner.search(x_tr, y_tr, epochs=1, validation_data=(x_vl, y_vl), callbacks=[early_stopping])

    architecture = tuner.get_best_hyperparameters(5)[0]

    archi = []
    for j in ["head_size", "num_heads", "ff_dim", "dropout", "mlp_dropout"]:
        archi.append((architecture.get(j), j))

    model = build_model(architecture)
    print(" ")
    print("Training the best model")
    start_time = time.time()
    model.fit(x_tr, y_tr, epochs=epochs, validation_data=(x_vl, y_vl), callbacks=[early_stopping])
    trainig_time = elapsed_time(start_time)

    rmse_tr = model.evaluate(x_tr, y_tr, verbose=1)
    rmse_vl = model.evaluate(x_vl, y_vl, verbose=1)
    rmse_ts = model.evaluate(x_ts, y_ts, verbose=1)

    #Grafico de entrenamiento
    # df_history = pd.DataFrame(historia.history)
    # x = df_history.index
    # plt.figure(figsize=(15, 10))
    # plt.plot(x, df_history['loss'], label='Función de perdida en entrenamiento')
    # plt.plot(x, df_history['val_loss'], label='Función de perdida en validación')
    # plt.legend()
    # plt.show()

    start_time = time.time()
    y_ts_pred = model.predict(x_ts)
    prediction_time = elapsed_time(start_time)
    test = pd.DataFrame(data = {'predictions':y_ts_pred.reshape(-1), 'actual':y_ts}, index=ts[24:].index)
    df_result = pd.DataFrame(scaler.inverse_transform(test), columns=test.columns)

    # Grafico de predicciones
    x = df_result.index
    plt.figure(figsize=(15, 10))
    plt.plot(x, df_result['actual'], label='actual')
    plt.plot(x, df_result['predictions'], label='predictions')
    plt.legend()
    plt.show()

    # Grafico de error 
    df_result['error'] = df_result['actual'] - df_result['predictions']
    plt.figure(figsize=(15, 10))
    plt.plot(df_result['error'])
    plt.show()

    # Grafico de RMSE por hora
    df_result.index = test.index
    df_result['hora'] = df_result.index.hour
    rmse_por_hora = []
    for hora in range(24):
        df_hora = df_result[df_result['hora'] == hora]
        rmse = np.sqrt(mean_squared_error(df_hora['actual'], df_hora['predictions']))
        rmse_por_hora.append(rmse)

    rmse_df = pd.DataFrame(rmse_por_hora, columns=['rmse_por_hora'], index=range(len(rmse_por_hora)))
    fig, ax = plt.subplots(figsize=(15, 10))
    ax.plot(rmse_df['rmse_por_hora'], marker='o')
    ax.set_xlabel('Hora predicha')
    ax.set_ylabel('Error RMSE ($/kWh)')
    plt.grid()
    plt.show()

    # Guardar el modelo
    model.save('enconder_model/tunning_save/best_model_{train_size}.h5'.format(train_size=i[0]))

    mean_MAE = MAE(p_pred= df_result['predictions'], p_real=df_result['actual'])
    mean_RMSE = RMSE(p_pred= df_result['predictions'], p_real=df_result['actual'])
    mean_MAPE = MAPE(p_pred= df_result['predictions'], p_real=df_result['actual']) * 100
    r2 = metrics.r2_score(df_result['actual'], df_result['predictions'])

    results = {'trainig_size': i[0], 'model': 'transformer', 'type': 'tunning', 'training_time': trainig_time, 
               'prediction_time': prediction_time, 'rmse_trainig': rmse_tr, 'rmse_validation': rmse_vl, 'rmse_test': rmse_ts, 
               'mean_MAE': mean_MAE, 'r2': r2, 'mean_RMSE': mean_RMSE, 'mean_MAPE': mean_MAPE, "architecture": [archi]}
    
    lista_results.append(results)

df_results_bases = pd.DataFrame(lista_results)
df_results_bases.to_csv('enconder_model/tunning_save/df_results_best_{train}.csv'.format(train=i[0]))

Trial 1 Complete [00h 01m 09s]
val_loss: 0.005203812848776579

Best val_loss So Far: 0.005203812848776579
Total elapsed time: 00h 01m 09s
INFO:tensorflow:Oracle triggered exit
 
Training the best model
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
