# GridSearch Manual

In [79]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import itertools

# === 1. Carregar dados ===
df = pd.read_csv('/content/drive/MyDrive/Projeto Ciência de Dados/dados-manaus-preprocessado.csv')
df = df[df['ano'] >= 2000].drop(columns=['municipio', 'uf'])
X_df = df.drop(columns='vazao')
y_df = df['vazao']
target_column = 'vazao'
target_index = df.columns.get_loc(target_column)

# === 2. Normalizar ===
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

# === 3. Função para criar janelas ===
def create_multistep_sequences(data, target_index, window_size, forecast_horizon):
    X, y = [], []
    for i in range(len(data) - window_size - forecast_horizon):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size:i+window_size+forecast_horizon, target_index])
    return np.array(X), np.array(y)

# === 4. Função para treinar modelo ===
def run_experiment(window_size, epochs, batch_size, val_split, verbose):
    forecast_horizon = window_size // 2
    X, y = create_multistep_sequences(scaled_data, target_index, window_size, forecast_horizon)
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    model = Sequential()
    model.add(Input(shape=(X.shape[1], X.shape[2])))
    model.add(LSTM(64))
    model.add(Dropout(0.3))
    model.add(Dense(forecast_horizon))
    model.compile(optimizer='adam', loss='mse')

    es = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    history = model.fit(X_train, y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_split=val_split,
                        callbacks=[es],
                        verbose=verbose)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Reverter normalização
    scale = scaler.scale_[target_index]
    min_ = scaler.min_[target_index]
    def denorm(y): return y * (1 / scale) + min_

    y_train_real = denorm(y_train)
    y_test_real = denorm(y_test)
    y_train_pred_real = denorm(y_train_pred)
    y_test_pred_real = denorm(y_test_pred)

    # Métricas (média dos horizontes)
    rmse_train = np.mean([np.sqrt(mean_squared_error(y_train_real[:, i], y_train_pred_real[:, i])) for i in range(forecast_horizon)])
    rmse_test = np.mean([np.sqrt(mean_squared_error(y_test_real[:, i], y_test_pred_real[:, i])) for i in range(forecast_horizon)])
    r2_train = np.mean([r2_score(y_train_real[:, i], y_train_pred_real[:, i]) for i in range(forecast_horizon)])
    r2_test = np.mean([r2_score(y_test_real[:, i], y_test_pred_real[:, i]) for i in range(forecast_horizon)])

    return {
        'window_size': window_size,
        'forecast_horizon': forecast_horizon,
        'epochs': epochs,
        'batch_size': batch_size,
        'val_split': val_split,
        'verbose': verbose,
        'rmse_train': rmse_train,
        'rmse_test': rmse_test,
        'r2_train': r2_train,
        'r2_test': r2_test
    }

# === 5. Hiperparâmetros a testar ===
param_grid = {
    'window_size': [14, 30, 50],
    'epochs': [50, 100],
    'batch_size': [16, 32],
    'val_split': [0.1, 0.2],
    'verbose': [0]  # Mantenha 0 para não lotar o output
}

combinations = list(itertools.product(*param_grid.values()))

# === 6. Rodar experimentos ===
results = []
for params in combinations:
    config = dict(zip(param_grid.keys(), params))
    print(f"Executando: {config}")
    result = run_experiment(**config)
    print(result)
    results.append(result)

# === 7. Salvar resultados ===
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='r2_test', ascending=False)
results_df.to_csv('/content/resultados_LSTM_param_grid.csv', index=False)

# Mostrar top 5 resultados
print("\nTop 5 combinações:")
print(results_df.head())


Executando: {'window_size': 14, 'epochs': 50, 'batch_size': 16, 'val_split': 0.1, 'verbose': 0}
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
{'window_size': 14, 'forecast_horizon': 7, 'epochs': 50, 'batch_size': 16, 'val_split': 0.1, 'verbose': 0, 'rmse_train': np.float64(2912.26359843662), 'rmse_test': np.float64(6395.745292979421), 'r2_train': np.float64(0.9944749540117209), 'r2_test': np.float64(0.9767320042946827)}
Executando: {'window_size': 14, 'epochs': 50, 'batch_size': 16, 'val_split': 0.2, 'verbose': 0}
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
{'window_size': 14, 'forecast_horizon': 7, 'epochs': 50, 'batch_size': 16, 'val_split': 0.2, 'verbose': 0, 'rmse_train': np.float64(2925.3965457753657), 'rmse_test': np.float64(6247.339908526874), 'r2_train': np.float64(0.99452531