In [19]:
import pandas as pd
import numpy as np
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

In [2]:
df = pd.read_excel('../Input/DadosCompeticao.xlsx')

#### NN com LSTM (bayesian search) -> 3 camadas (WRMSE = 0.08913640587115884)

In [4]:
forecast_window = 12
past_window = df.shape[1] - 1

In [5]:
def data_preparation(series_scaled, forecast_window, past_window):
    X, y = [], []
    for i in range(len(series_scaled) - past_window - forecast_window + 1):
        X.append(series_scaled[i:i + past_window])
        y.append(series_scaled[i + past_window:i + past_window + forecast_window])
    return np.array(X), np.array(y)

In [21]:
def build_model(past_window, forecast_window, neurons=100, dropout_rate=0.3, learning_rate=0.001, activation='relu'):
    model = Sequential([
        LSTM(neurons, activation=activation, return_sequences=True, input_shape=(past_window, 1)),
        Dropout(dropout_rate),
        LSTM(neurons, activation=activation, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(neurons, activation=activation, return_sequences=False),
        Dropout(dropout_rate),
        Dense(neurons, activation=activation),
        Dense(forecast_window)
    ])

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

In [23]:
def objective(trial, series, forecast_window, past_window, n_splits=3):
    ## Set de possibilidades para busca bayesiana
    neurons = trial.suggest_categorical("neurons", [50, 100, 150, 300])
    dropout_rate = trial.suggest_categorical("dropout_rate", [0.1, 0.2, 0.3, 0.5])
    learning_rate = trial.suggest_categorical("learning_rate", [0.01, 0.001, 0.0005])
    activation = trial.suggest_categorical("activation", ['relu', 'tanh', 'sigmoid', 'softmax'])

    scaler = MinMaxScaler()
    series_scaled = scaler.fit_transform(series.reshape(-1, 1))

    X, y = data_preparation(series_scaled, forecast_window, past_window)
    X = X.reshape((X.shape[0], past_window, 1))

    tscv = TimeSeriesSplit(n_splits=n_splits) ## -> seguir com split
    rmse_scores = []

    for train_idx, val_idx in tscv.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = build_model(past_window, forecast_window, neurons, dropout_rate, learning_rate, activation)
        model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

        y_pred = model.predict(X_val)
        y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)
        y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1)).reshape(y_val.shape)

        rmse = np.mean([
            root_mean_squared_error(y_val_rescaled[:, i], y_pred_rescaled[:, i])
            for i in range(forecast_window)
        ])
        rmse_scores.append(rmse)

    return np.mean(rmse_scores) ## Função objetivo para minimizar RMSE

In [26]:
def NN_bayesian_search(forecast_window, past_window, n_trials=5):
    best_params = {}

    for col in df.columns:
        print(f"\n parâmetros para: {col}")
        series = df[col].values

        study = optuna.create_study(direction="minimize") ## min objective s.a set_params
        study.optimize(lambda trial: objective(trial, series, forecast_window, past_window), n_trials=n_trials)

        best_params[col] = study.best_params

    return best_params

In [27]:
params = NN_bayesian_search(forecast_window=forecast_window, past_window=past_window) # muito custoso (out of GPU)

[I 2025-04-20 19:40:39,988] A new study created in memory with name: no-name-a23bcef1-6ef5-4298-80e5-b4916605a797



🔎 Buscando melhores parâmetros para: #1


[I 2025-04-20 19:40:57,921] Trial 0 finished with value: 0.1676267202504347 and parameters: {'neurons': 50, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 0 with value: 0.1676267202504347.




[I 2025-04-20 19:41:16,361] Trial 1 finished with value: 0.184324961622279 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 0 with value: 0.1676267202504347.




[I 2025-04-20 19:41:52,091] Trial 2 finished with value: 0.19701460390758163 and parameters: {'neurons': 300, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 0 with value: 0.1676267202504347.




[I 2025-04-20 19:42:17,657] Trial 3 finished with value: 0.2098687609989274 and parameters: {'neurons': 150, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'activation': 'softmax'}. Best is trial 0 with value: 0.1676267202504347.




[I 2025-04-20 19:42:36,435] Trial 4 finished with value: 0.21915935668632605 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 0 with value: 0.1676267202504347.
[I 2025-04-20 19:42:36,437] A new study created in memory with name: no-name-859a15f3-5e3b-4059-93cf-2477c9bafeec



🔎 Buscando melhores parâmetros para: #2


[I 2025-04-20 19:43:00,949] Trial 0 finished with value: 0.15584718981069134 and parameters: {'neurons': 50, 'dropout_rate': 0.1, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 0 with value: 0.15584718981069134.




[I 2025-04-20 19:43:18,858] Trial 1 finished with value: 0.2537051648450998 and parameters: {'neurons': 50, 'dropout_rate': 0.2, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 0 with value: 0.15584718981069134.




[I 2025-04-20 19:43:40,274] Trial 2 finished with value: 0.3592043970263052 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.15584718981069134.




[I 2025-04-20 19:44:06,752] Trial 3 finished with value: 0.1635470573231291 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'activation': 'tanh'}. Best is trial 0 with value: 0.15584718981069134.




[I 2025-04-20 19:44:42,960] Trial 4 finished with value: 0.2573438981918283 and parameters: {'neurons': 300, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'sigmoid'}. Best is trial 0 with value: 0.15584718981069134.
[I 2025-04-20 19:44:42,961] A new study created in memory with name: no-name-914b91f3-059b-461a-ab33-f5be46ed8dcb



🔎 Buscando melhores parâmetros para: #3


[I 2025-04-20 19:45:19,108] Trial 0 finished with value: 0.14288120317696515 and parameters: {'neurons': 300, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'relu'}. Best is trial 0 with value: 0.14288120317696515.




[I 2025-04-20 19:45:53,472] Trial 1 finished with value: 0.12334397277774196 and parameters: {'neurons': 150, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'tanh'}. Best is trial 1 with value: 0.12334397277774196.




[I 2025-04-20 19:46:18,065] Trial 2 finished with value: 0.14234925037691315 and parameters: {'neurons': 150, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 1 with value: 0.12334397277774196.




[I 2025-04-20 19:46:45,298] Trial 3 finished with value: 0.15682291224806988 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 1 with value: 0.12334397277774196.




[I 2025-04-20 19:47:03,062] Trial 4 finished with value: 0.15411909135088064 and parameters: {'neurons': 50, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 1 with value: 0.12334397277774196.
[I 2025-04-20 19:47:03,063] A new study created in memory with name: no-name-99356c4c-0939-4f83-921b-22c14c0abd8d



🔎 Buscando melhores parâmetros para: #4


[I 2025-04-20 19:47:26,706] Trial 0 finished with value: 0.28714784914932423 and parameters: {'neurons': 150, 'dropout_rate': 0.3, 'learning_rate': 0.001, 'activation': 'softmax'}. Best is trial 0 with value: 0.28714784914932423.




[I 2025-04-20 19:47:47,423] Trial 1 finished with value: 0.28560586694667084 and parameters: {'neurons': 150, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'sigmoid'}. Best is trial 1 with value: 0.28560586694667084.




[I 2025-04-20 19:48:13,900] Trial 2 finished with value: 0.048660066618744745 and parameters: {'neurons': 50, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 2 with value: 0.048660066618744745.




[I 2025-04-20 19:48:36,384] Trial 3 finished with value: 0.31886509253077794 and parameters: {'neurons': 150, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 2 with value: 0.048660066618744745.




[I 2025-04-20 19:48:54,431] Trial 4 finished with value: 0.041706980262304356 and parameters: {'neurons': 100, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 4 with value: 0.041706980262304356.
[I 2025-04-20 19:48:54,432] A new study created in memory with name: no-name-56bb6390-5cea-4825-a3b7-931df4feefbe



🔎 Buscando melhores parâmetros para: #5


[I 2025-04-20 19:49:14,670] Trial 0 finished with value: 0.2255121444229331 and parameters: {'neurons': 150, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'sigmoid'}. Best is trial 0 with value: 0.2255121444229331.




[I 2025-04-20 19:49:38,411] Trial 1 finished with value: 0.13639552386664658 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 1 with value: 0.13639552386664658.




[I 2025-04-20 19:50:00,938] Trial 2 finished with value: 0.176752721133028 and parameters: {'neurons': 150, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'activation': 'softmax'}. Best is trial 1 with value: 0.13639552386664658.




[I 2025-04-20 19:50:36,510] Trial 3 finished with value: 0.15319113275692334 and parameters: {'neurons': 300, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 1 with value: 0.13639552386664658.




[I 2025-04-20 19:51:01,135] Trial 4 finished with value: 0.22334617144867072 and parameters: {'neurons': 100, 'dropout_rate': 0.3, 'learning_rate': 0.01, 'activation': 'softmax'}. Best is trial 1 with value: 0.13639552386664658.
[I 2025-04-20 19:51:01,137] A new study created in memory with name: no-name-0c7e59bd-8b7b-4208-9e4b-e1544a5e5056



🔎 Buscando melhores parâmetros para: #6


[I 2025-04-20 19:51:18,923] Trial 0 finished with value: 0.2796577505858718 and parameters: {'neurons': 50, 'dropout_rate': 0.3, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 0 with value: 0.2796577505858718.




[I 2025-04-20 19:51:38,531] Trial 1 finished with value: 0.19212090323989608 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 1 with value: 0.19212090323989608.




[I 2025-04-20 19:52:00,092] Trial 2 finished with value: 0.2804487918520762 and parameters: {'neurons': 150, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 1 with value: 0.19212090323989608.




[I 2025-04-20 19:52:22,780] Trial 3 finished with value: 0.2613885775301879 and parameters: {'neurons': 150, 'dropout_rate': 0.2, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 1 with value: 0.19212090323989608.




[I 2025-04-20 19:52:43,258] Trial 4 finished with value: 0.1880067259806605 and parameters: {'neurons': 100, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 4 with value: 0.1880067259806605.
[I 2025-04-20 19:52:43,259] A new study created in memory with name: no-name-2638c8c5-8291-4aec-acc6-bb66834bc5bf



🔎 Buscando melhores parâmetros para: #7


[I 2025-04-20 19:53:01,953] Trial 0 finished with value: 0.09435603721585785 and parameters: {'neurons': 50, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'relu'}. Best is trial 0 with value: 0.09435603721585785.




[I 2025-04-20 19:53:49,420] Trial 1 finished with value: 0.09275916743019962 and parameters: {'neurons': 300, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 1 with value: 0.09275916743019962.




[I 2025-04-20 19:54:16,203] Trial 2 finished with value: 0.1810974034511724 and parameters: {'neurons': 100, 'dropout_rate': 0.2, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 1 with value: 0.09275916743019962.




[I 2025-04-20 19:54:40,689] Trial 3 finished with value: 0.09608743702487806 and parameters: {'neurons': 150, 'dropout_rate': 0.2, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 1 with value: 0.09275916743019962.




[I 2025-04-20 19:55:31,218] Trial 4 finished with value: 0.09266440566048602 and parameters: {'neurons': 300, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'activation': 'tanh'}. Best is trial 4 with value: 0.09266440566048602.
[I 2025-04-20 19:55:31,219] A new study created in memory with name: no-name-e3840351-c222-4409-9bba-f4ea19d954c5



🔎 Buscando melhores parâmetros para: #8


[I 2025-04-20 19:55:59,676] Trial 0 finished with value: 0.11224524303862814 and parameters: {'neurons': 100, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'activation': 'tanh'}. Best is trial 0 with value: 0.11224524303862814.




[I 2025-04-20 19:56:18,969] Trial 1 finished with value: 0.14148662859561092 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 0 with value: 0.11224524303862814.




[I 2025-04-20 19:57:05,971] Trial 2 finished with value: 0.3749334473825155 and parameters: {'neurons': 300, 'dropout_rate': 0.2, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.11224524303862814.




[I 2025-04-20 19:57:53,429] Trial 3 finished with value: 0.37631596044945814 and parameters: {'neurons': 300, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.11224524303862814.




[I 2025-04-20 19:58:41,781] Trial 4 finished with value: 0.2713697696567095 and parameters: {'neurons': 300, 'dropout_rate': 0.3, 'learning_rate': 0.001, 'activation': 'softmax'}. Best is trial 0 with value: 0.11224524303862814.
[I 2025-04-20 19:58:41,782] A new study created in memory with name: no-name-0972691f-55f1-4f07-98e1-3ba45cb7e884



🔎 Buscando melhores parâmetros para: #9


[I 2025-04-20 19:59:05,742] Trial 0 finished with value: 0.399845119370555 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.399845119370555.




[I 2025-04-20 19:59:27,269] Trial 1 finished with value: 0.2238585524242951 and parameters: {'neurons': 100, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'relu'}. Best is trial 1 with value: 0.2238585524242951.




[I 2025-04-20 19:59:54,158] Trial 2 finished with value: 0.1596318957890321 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 2 with value: 0.1596318957890321.




[I 2025-04-20 20:00:27,696] Trial 3 finished with value: 0.1334027593141546 and parameters: {'neurons': 150, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'tanh'}. Best is trial 3 with value: 0.1334027593141546.




[I 2025-04-20 20:00:53,385] Trial 4 finished with value: 0.22160868350756394 and parameters: {'neurons': 150, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'activation': 'sigmoid'}. Best is trial 3 with value: 0.1334027593141546.
[I 2025-04-20 20:00:53,387] A new study created in memory with name: no-name-6e26fdaa-011a-47a6-aafd-85cda51df13c



🔎 Buscando melhores parâmetros para: #10


[I 2025-04-20 20:01:16,030] Trial 0 finished with value: 0.20493869978734178 and parameters: {'neurons': 100, 'dropout_rate': 0.2, 'learning_rate': 0.0005, 'activation': 'relu'}. Best is trial 0 with value: 0.20493869978734178.




[I 2025-04-20 20:01:40,949] Trial 1 finished with value: 0.39968462250735476 and parameters: {'neurons': 100, 'dropout_rate': 0.5, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.20493869978734178.




[I 2025-04-20 20:02:09,484] Trial 2 finished with value: 0.4021842685829353 and parameters: {'neurons': 150, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 0 with value: 0.20493869978734178.




[I 2025-04-20 20:02:29,980] Trial 3 finished with value: 0.21069334850731447 and parameters: {'neurons': 50, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'activation': 'relu'}. Best is trial 0 with value: 0.20493869978734178.




[I 2025-04-20 20:03:02,300] Trial 4 finished with value: 0.20436449838705942 and parameters: {'neurons': 150, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'tanh'}. Best is trial 4 with value: 0.20436449838705942.
[I 2025-04-20 20:03:02,301] A new study created in memory with name: no-name-1be317e5-728e-42f0-be74-a678d1b25e0e



🔎 Buscando melhores parâmetros para: #11


[I 2025-04-20 20:03:24,397] Trial 0 finished with value: 0.14105968389925203 and parameters: {'neurons': 100, 'dropout_rate': 0.3, 'learning_rate': 0.01, 'activation': 'relu'}. Best is trial 0 with value: 0.14105968389925203.




[I 2025-04-20 20:03:54,073] Trial 1 finished with value: 0.13732333445872338 and parameters: {'neurons': 100, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'activation': 'tanh'}. Best is trial 1 with value: 0.13732333445872338.




[I 2025-04-20 20:04:43,099] Trial 2 finished with value: 0.4783894976552309 and parameters: {'neurons': 300, 'dropout_rate': 0.2, 'learning_rate': 0.01, 'activation': 'softmax'}. Best is trial 1 with value: 0.13732333445872338.




[I 2025-04-20 20:05:46,202] Trial 3 finished with value: 0.11258067383793235 and parameters: {'neurons': 300, 'dropout_rate': 0.5, 'learning_rate': 0.01, 'activation': 'tanh'}. Best is trial 3 with value: 0.11258067383793235.




[I 2025-04-20 20:06:09,721] Trial 4 finished with value: 0.6980553759722788 and parameters: {'neurons': 50, 'dropout_rate': 0.2, 'learning_rate': 0.0005, 'activation': 'softmax'}. Best is trial 3 with value: 0.11258067383793235.


In [30]:
def NN(best_params, forecast_window, past_window):
    scores = {}
    forecasts = {}

    for col in df.columns:
        print(f"\n ==== Treinando {col} ====")

        series = df[col].values.reshape(-1, 1)
        scaler = MinMaxScaler()
        series_scaled = scaler.fit_transform(series)

        X, y = data_preparation(series_scaled, forecast_window, past_window)
        X = X.reshape((X.shape[0], past_window, 1))

        params = best_params[col]
        model = build_model(
            past_window=past_window,
            forecast_window=forecast_window,
            neurons=params['neurons'],
            dropout_rate=params['dropout_rate'],
            learning_rate=params['learning_rate'],
            activation=params['activation']
        )

        model.fit(X, y, epochs=100, batch_size=32, verbose=0) # Preservar params de fit

        # Previsão
        last_input = series_scaled[-past_window:].reshape((1, past_window, 1))
        forecast = model.predict(last_input)
        forecast_rescaled = scaler.inverse_transform(forecast.reshape(-1, 1)).flatten()

        forecasts[col] = forecast_rescaled

        y_pred = model.predict(X)
        y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)
        y_rescaled = scaler.inverse_transform(y.reshape(-1, 1)).reshape(y.shape)
        rmse = np.mean([
            root_mean_squared_error(y_rescaled[:, i], y_pred_rescaled[:, i])
            for i in range(forecast_window)
        ])
        scores[col] = rmse

    return forecasts, scores

In [31]:
forecast_set, score_set = NN(best_params=params, forecast_window=forecast_window, past_window=past_window)


📈 Gerando previsão para: #1

📈 Gerando previsão para: #2

📈 Gerando previsão para: #3

📈 Gerando previsão para: #4

📈 Gerando previsão para: #5

📈 Gerando previsão para: #6

📈 Gerando previsão para: #7

📈 Gerando previsão para: #8

📈 Gerando previsão para: #9

📈 Gerando previsão para: #10

📈 Gerando previsão para: #11


In [32]:
forecast = pd.DataFrame(forecast_set)
df_scores = pd.DataFrame.from_dict(score_set, orient='index', columns=['RMSE'])

In [33]:
forecast.to_excel('../Output/v5/previsoes_lstm.xlsx', index=False)
df_scores.to_excel('../Output/v5/scores_lstm.xlsx')

In [34]:
wrmse = lambda rmse: (rmse[:11] * (1 / 11)).sum()

In [35]:
print(f'WRMSE = {wrmse(df_scores["RMSE"])}')

WRMSE = 0.08913640587115884
