In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit

In [None]:
df = pd.read_excel('../Input/DadosCompeticao.xlsx')

#### NN com LSTM -> 3 camadas (WRMSE = 0.15065080631426203)

In [None]:
forecast_window = 12
past_window = df.shape[1] - 1

In [None]:
print(df.index)

In [None]:
def data_preparation(series_scaled, forecast_window, past_window):
    X, y = [], []
    for i in range(len(series_scaled) - past_window - forecast_window + 1):
        X.append(series_scaled[i:i + past_window])
        y.append(series_scaled[i + past_window:i + past_window + forecast_window])
    return np.array(X), np.array(y)

In [None]:
def LSTM_model(past_window, forecast_window):
    model = Sequential([
        LSTM(100, activation='relu', return_sequences=True, input_shape=(past_window, 1)),
        Dropout(0.5),
        LSTM(100, activation='relu', return_sequences=True),
        Dropout(0.3),
        LSTM(100, activation='relu', return_sequences=False),
        Dropout(0.1),
        Dense(100, activation='relu'),
        Dense(forecast_window)
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [None]:
def NN(forecast_window, past_window, n_splits=3):
    scores = {}
    forecasts = {}

    for col in df.columns:
        print(f"\n==== série Treinada: {col} ====")

        series = df[col].values.reshape(-1, 1)
        scaler = MinMaxScaler()
        series_scaled = scaler.fit_transform(series)

        X, y = data_preparation(series_scaled, forecast_window, past_window)
        X = X.reshape((X.shape[0], past_window, 1))

        tscv = TimeSeriesSplit(n_splits=n_splits) # split em time series, não em KFold
        rmse_folds = []

        ## Predição/Treino
        for train_index, val_index in tscv.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            model = LSTM_model(past_window, forecast_window)
            model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

            y_pred = model.predict(X_val)
            y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1)).reshape(y_val.shape)
            y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)

            rmse = np.mean([
                root_mean_squared_error(y_val_rescaled[:, i], y_pred_rescaled[:, i])
                for i in range(forecast_window)
            ])
            rmse_folds.append(rmse)

        scores[col] = np.mean(rmse_folds)

        ## Previsão, variar batch_size... Grid search ou Bayesian search? LR estático em 0.001
        last_input = series_scaled[-past_window:].reshape((1, past_window, 1))
        model_final = LSTM_model(past_window, forecast_window)
        model_final.fit(X, y, epochs=100, batch_size=32, verbose=0)
        y_forecast = model_final.predict(last_input)
        y_forecast_rescaled = scaler.inverse_transform(y_forecast.reshape(-1, 1)).flatten()

        forecasts[col] = y_forecast_rescaled

    return forecasts, scores


In [None]:
forecast_set, score_set = NN(forecast_window, past_window)

In [None]:
forecast = pd.DataFrame(forecast_set)
df_scores = pd.DataFrame.from_dict(score_set, orient='index', columns=['RMSE'])

In [None]:
forecast.to_excel('../Output/v2/previsoes_lstm.xlsx', index=False)
df_scores.to_excel('../Output/v2/scores_lstm.xlsx', index=False)

In [None]:
wrmse = lambda rmse: (rmse[:11] * (1 / 11)).sum()

In [61]:
print(f'WRMSE = {wrmse(df_scores["RMSE"])}')

WRMSE = 0.15065080631426203
