In [None]:
import pandas as pd
df = pd.read_csv('./datasets/data_treino_dv_df_2000_2010.csv')
df.shape

In [None]:
(len(df)/24)-21

In [None]:
for start in range(0,87502,24): #Para que la ventana se mueva cada 24 horas
  print(start)


In [None]:
# Construye un modelo Multilayer Perceptron (MLP) para regresión

def build_mlp_model(input_dim, hidden_layers, activation='relu', learning_rate=0.001, kernel_regularizer=None):
    model = Sequential()
    model.add(Dense(hidden_layers[0], input_dim=input_dim, activation=activation, kernel_regularizer=kernel_regularizer))
    for units in hidden_layers[1:]:
        model.add(Dense(units, activation=activation, kernel_regularizer=kernel_regularizer))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model


# Construye un modelo RNN simple con una sola capa recurrente

def build_rnn_model(timesteps, input_dim, hidden_units, activation='tanh', learning_rate=0.001):
    model = Sequential()
    model.add(SimpleRNN(hidden_units, activation=activation, input_shape=(timesteps, input_dim)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

# Construye un modelo LSTM para capturar relaciones temporales largas

def build_lstm_model(timesteps, input_dim, hidden_units, activation='tanh', learning_rate=0.001):
    model = Sequential()
    model.add(LSTM(hidden_units, activation=activation, input_shape=(timesteps, input_dim)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

# Genera secuencias de datos para modelos RNN o LSTM

def create_rnn_sequences(df, target_col, window_size):
    X, y = [], []
    for i in range(len(df) - window_size):
        seq = df.iloc[i:i+window_size]
        X.append(seq.drop(columns=[target_col]).values)
        y.append(df.iloc[i+window_size][target_col])
    return np.array(X), np.array(y)

# Función principal para entrenamiento y evaluación de MLP, RNN y LSTM con sliding windows

def sliding_window_regression_models_scaling_keras_rnn(
    df,
    target_col='WIND_VEL_HOR',
    T_values=[7, 14],
    test_window=1,
    #model_types=['MLP', 'RNN', 'LSTM'],
    model_types=['MLP'],
    mlp_params = {
        'hidden_layers': [[64], [32], [64, 32]],
        'activation': ['tanh'],
        'learning_rate': [0.001, 0.01],
        'epochs': [50],
        'batch_size': [32],
        'kernel_regularizer': [l2(0.001), l2(0.01)]
    },
    rnn_params={
        'hidden_units': [16, 32],
        'activation': ['tanh'],
        'learning_rate': [0.001],
        'epochs': [50],
        'batch_size': [32]
    },
    lstm_params={
        'hidden_units': [32, 64],
        'activation': ['tanh'],
        'learning_rate': [0.001],
        'epochs': [50],
        'batch_size': [32]
    },
    save_path='./progreso_keras_rnn'
):
    # Crea carpeta de resultados si no existe
    os.makedirs(save_path, exist_ok=True)
    resultados_por_T = {}

    # Itera sobre diferentes ventanas de entrenamiento T
    for T in tqdm(T_values, desc="Procesando ventanas T", unit="ventana"):
        T_hours = T * 24
        test_hours = test_window * 24
        total_windows = len(df) - T_hours - test_hours + 1
        output_path = os.path.join(save_path, f'ANN_resultados_T{T}.csv')

        # Carga resultados previos si existen para evitar cálculos redundantes
        if os.path.exists(output_path):
            df_prev = pd.read_csv(output_path)
            if 'params' in df_prev:
                df_prev['params'] = df_prev['params'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
        else:
            df_prev = pd.DataFrame(columns=[
                'modelo', 'params', 'T_dias', 'T_horas', 'MAPE', 'MAE', 'RMSE', 'MSE', 'R2', 'LjungBox_p'
            ])

        # Itera sobre MLP, RNN o LSTM
        for model_type in model_types:
            if model_type == 'MLP':
                param_grid = list(product(*mlp_params.values()))
                param_keys = list(mlp_params.keys())
            elif model_type == 'RNN':
                param_grid = list(product(*rnn_params.values()))
                param_keys = list(rnn_params.keys())
            elif model_type == 'LSTM':
                param_grid = list(product(*lstm_params.values()))
                param_keys = list(lstm_params.keys())

            # Itera sobre combinaciones de hiperparámetros
            for combo in param_grid:
                param_dict = dict(zip(param_keys, combo))
                print(f"🔧 Evaluando modelo {model_type} con hiperparámetros: {param_dict}")
                param_dict = dict(zip(param_keys, combo))
                if not df_prev.empty and ((df_prev['modelo'] == model_type) & (df_prev['params'].apply(lambda p: p == param_dict))).any():
                    continue

                resultados = {k: [] for k in ['MAPE', 'MAE', 'RMSE', 'MSE', 'R2', 'LjungBox_p']}

                # Sliding window sobre el conjunto de datos
                for start in range(0, total_windows, 24):
                    if model_type == 'MLP':
                        train = df.iloc[start: start + T_hours]
                        test = df.iloc[start + T_hours: start + T_hours + test_hours]

                        X_train = train.drop(columns=[target_col])
                        y_train = train[target_col]
                        X_test = test.drop(columns=[target_col])
                        y_test = test[target_col]

                        scaler = StandardScaler()
                        X_train = scaler.fit_transform(X_train)
                        X_test = scaler.transform(X_test)

                        model = build_mlp_model(
                            input_dim=X_train.shape[1],
                            hidden_layers=param_dict['hidden_layers'],
                            activation=param_dict['activation'],
                            learning_rate=param_dict['learning_rate'],
                            kernel_regularizer=param_dict['kernel_regularizer']
                        )

                        early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
                        model.fit(X_train, y_train, epochs=param_dict['epochs'], batch_size=param_dict['batch_size'], verbose=0, callbacks=[early_stop])
                        y_pred = model.predict(X_test).flatten()

                    else:
                        # Escalamiento sin data leakage para RNN/LSTM
                        data_window = df.iloc[start: start + T_hours + test_hours].copy()
                        train_data = data_window.iloc[:T_hours]
                        test_data = data_window.iloc[T_hours:]

                        scaler = StandardScaler()
                        X_train_scaled = scaler.fit_transform(train_data.drop(columns=[target_col]))
                        X_test_scaled = scaler.transform(test_data.drop(columns=[target_col]))

                        train_scaled = pd.DataFrame(X_train_scaled, columns=train_data.columns.drop(target_col))
                        train_scaled[target_col] = train_data[target_col].values

                        test_scaled = pd.DataFrame(X_test_scaled, columns=test_data.columns.drop(target_col))
                        test_scaled[target_col] = test_data[target_col].values

                        scaled = pd.concat([train_scaled, test_scaled])
                        X, y = create_rnn_sequences(scaled, target_col, T_hours)
                        X_train, y_train = X[:-1], y[:-1]
                        X_test, y_test = X[-1:], y[-1:]

                        if model_type == 'RNN':
                            model = build_rnn_model(
                                timesteps=X_train.shape[1],
                                input_dim=X_train.shape[2],
                                hidden_units=param_dict['hidden_units'],
                                activation=param_dict['activation'],
                                learning_rate=param_dict['learning_rate']
                            )
                        else:  # LSTM
                            model = build_lstm_model(
                                timesteps=X_train.shape[1],
                                input_dim=X_train.shape[2],
                                hidden_units=param_dict['hidden_units'],
                                activation=param_dict['activation'],
                                learning_rate=param_dict['learning_rate']
                            )

                        early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
                        model.fit(X_train, y_train, epochs=param_dict['epochs'], batch_size=param_dict['batch_size'], verbose=0, callbacks=[early_stop])
                        y_pred = model.predict(X_test).flatten()

                    # Cálculo de métricas
                    residuals = y_test.values - y_pred if hasattr(y_test, 'values') else y_test - y_pred
                    resultados['MAPE'].append(mean_absolute_percentage_error(y_test, y_pred))
                    resultados['MAE'].append(mean_absolute_error(y_test, y_pred))
                    resultados['RMSE'].append(np.sqrt(mean_squared_error(y_test, y_pred)))
                    resultados['MSE'].append(mean_squared_error(y_test, y_pred))
                    resultados['R2'].append(r2_score(y_test, y_pred))
                    if model_type == 'MLP' and len(residuals) >= 2:
                        ljung_p = acorr_ljungbox(residuals, lags=[1], return_df=True)['lb_pvalue'].iloc[0]
                    else:
                        ljung_p = np.nan
                    resultados['LjungBox_p'].append(ljung_p)

                # Registro y guardado de resultados por combinación
                nuevo_row = pd.DataFrame([{
                    'modelo': model_type,
                    'params': param_dict,
                    'T_dias': T,
                    'T_horas': T_hours,
                    'MAPE': np.mean(resultados['MAPE']),
                    'MAE': np.mean(resultados['MAE']),
                    'RMSE': np.mean(resultados['RMSE']),
                    'MSE': np.mean(resultados['MSE']),
                    'R2': np.mean(resultados['R2']),
                    'LjungBox_p': np.nanmean(resultados['LjungBox_p'])
                }])
                df_prev = pd.concat([df_prev, nuevo_row], ignore_index=True)
                print(f"✅ Finalizado {model_type} con RMSE promedio: {np.mean(resultados['RMSE']):.4f}")
                df_prev.to_csv(output_path, index=False)

        resultados_por_T[T] = df_prev

    return resultados_por_T