In [None]:
import pandas as pd
import numpy as np
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

In [None]:
df = pd.read_excel('../Input/DadosCompeticao.xlsx')

#### NN com LSTM (bayesian search) -> 3 camadas (WRMSE = 0.08913640587115884)

In [None]:
forecast_window = 12
past_window = df.shape[1] - 1

In [None]:
def data_preparation(series_scaled, forecast_window, past_window):
    X, y = [], []
    for i in range(len(series_scaled) - past_window - forecast_window + 1):
        X.append(series_scaled[i:i + past_window])
        y.append(series_scaled[i + past_window:i + past_window + forecast_window])
    return np.array(X), np.array(y)

In [None]:
def build_model(past_window, forecast_window, neurons=100, dropout_rate=0.3, learning_rate=0.001, activation='relu'):
    model = Sequential([
        LSTM(neurons, activation=activation, return_sequences=True, input_shape=(past_window, 1)),
        Dropout(dropout_rate),
        LSTM(neurons, activation=activation, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(neurons, activation=activation, return_sequences=False),
        Dropout(dropout_rate),
        Dense(neurons, activation=activation),
        Dense(forecast_window)
    ])

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

In [None]:
def objective(trial, series, forecast_window, past_window, n_splits=3):
    ## Set de possibilidades para busca bayesiana
    neurons = trial.suggest_categorical("neurons", [50, 100, 150, 300])
    dropout_rate = trial.suggest_categorical("dropout_rate", [0.1, 0.2, 0.3, 0.5])
    learning_rate = trial.suggest_categorical("learning_rate", [0.01, 0.001, 0.0005])
    activation = trial.suggest_categorical("activation", ['relu', 'tanh', 'sigmoid', 'softmax'])

    scaler = MinMaxScaler()
    series_scaled = scaler.fit_transform(series.reshape(-1, 1))

    X, y = data_preparation(series_scaled, forecast_window, past_window)
    X = X.reshape((X.shape[0], past_window, 1))

    tscv = TimeSeriesSplit(n_splits=n_splits) ## -> seguir com split
    rmse_scores = []

    for train_idx, val_idx in tscv.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = build_model(past_window, forecast_window, neurons, dropout_rate, learning_rate, activation)
        model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

        y_pred = model.predict(X_val)
        y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)
        y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1)).reshape(y_val.shape)

        rmse = np.mean([
            root_mean_squared_error(y_val_rescaled[:, i], y_pred_rescaled[:, i])
            for i in range(forecast_window)
        ])
        rmse_scores.append(rmse)

    return np.mean(rmse_scores) ## Função objetivo para minimizar RMSE

In [None]:
def NN_bayesian_search(forecast_window, past_window, n_trials=5):
    best_params = {}

    for col in df.columns:
        print(f"\n parâmetros para: {col}")
        series = df[col].values

        study = optuna.create_study(direction="minimize") ## min objective s.a set_params
        study.optimize(lambda trial: objective(trial, series, forecast_window, past_window), n_trials=n_trials)

        best_params[col] = study.best_params

    return best_params

In [None]:
params = NN_bayesian_search(forecast_window=forecast_window, past_window=past_window) # muito custoso (out of GPU)

In [None]:
def NN(best_params, forecast_window, past_window):
    scores = {}
    forecasts = {}

    for col in df.columns:
        print(f"\n ==== Treinando {col} ====")

        series = df[col].values.reshape(-1, 1)
        scaler = MinMaxScaler()
        series_scaled = scaler.fit_transform(series)

        X, y = data_preparation(series_scaled, forecast_window, past_window)
        X = X.reshape((X.shape[0], past_window, 1))

        params = best_params[col]
        model = build_model(
            past_window=past_window,
            forecast_window=forecast_window,
            neurons=params['neurons'],
            dropout_rate=params['dropout_rate'],
            learning_rate=params['learning_rate'],
            activation=params['activation']
        )

        model.fit(X, y, epochs=100, batch_size=32, verbose=0) # Preservar params de fit

        # Previsão
        last_input = series_scaled[-past_window:].reshape((1, past_window, 1))
        forecast = model.predict(last_input)
        forecast_rescaled = scaler.inverse_transform(forecast.reshape(-1, 1)).flatten()

        forecasts[col] = forecast_rescaled

        y_pred = model.predict(X)
        y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)
        y_rescaled = scaler.inverse_transform(y.reshape(-1, 1)).reshape(y.shape)
        rmse = np.mean([
            root_mean_squared_error(y_rescaled[:, i], y_pred_rescaled[:, i])
            for i in range(forecast_window)
        ])
        scores[col] = rmse

    return forecasts, scores

In [None]:
forecast_set, score_set = NN(best_params=params, forecast_window=forecast_window, past_window=past_window)

In [None]:
forecast = pd.DataFrame(forecast_set)
df_scores = pd.DataFrame.from_dict(score_set, orient='index', columns=['RMSE'])

In [None]:
forecast.to_excel('../Output/v2_1/previsoes_lstm.xlsx', index=False)
df_scores.to_excel('../Output/v2_1/scores_lstm.xlsx')

In [None]:
wrmse = lambda rmse: (rmse[:11] * (1 / 11)).sum()

In [None]:
print(f'WRMSE = {wrmse(df_scores["RMSE"])}')