In [None]:
fh = 24 # forecast horizon, predice el consumo dentro de fh horas

import pandas as pd
import numpy as np
from pathlib import Path
import sys
import joblib
sys.path.append('../src')

from sklearn.preprocessing import StandardScaler
from utils.metrics import calculate_metrics, print_metrics
from modeling.advanced_ml import SARIMAXWrapper, ProphetWrapper
results = []

# Cargar splits
data_dir = Path('../data/processed')

train_df = pd.read_csv(data_dir / f'train_{fh}hr.csv', index_col='Datetime', parse_dates=True)
val_df = pd.read_csv(data_dir / f'val_{fh}hr.csv', index_col='Datetime', parse_dates=True)
test_df = pd.read_csv(data_dir / f'test_{fh}hr.csv', index_col='Datetime', parse_dates=True)

# Separar X e y
TARGET_COL = 'PJME_MW'
feature_cols = [col for col in train_df.columns if col != TARGET_COL]

X_train, y_train = train_df[feature_cols], train_df[TARGET_COL]
X_val, y_val = val_df[feature_cols], val_df[TARGET_COL]
X_test, y_test = test_df[feature_cols], test_df[TARGET_COL]

print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"X_test shape: {X_test.shape}\n")

train_df.info()
# train_df.describe()
# train_df['PJME_MW'].describe()

scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = X_train.copy()
X_val_scaled = X_val.copy()
X_test_scaled = X_test.copy()

X_train_scaled = scaler.transform(X_train_scaled)
X_val_scaled = scaler.transform(X_val_scaled)
X_test_scaled = scaler.transform(X_test_scaled)

X_train_scaled = pd.DataFrame(
    X_train_scaled,
    index=X_train.index,
    columns=X_train.columns
)

X_val_scaled = pd.DataFrame(
    X_val_scaled,
    index=X_val.index,
    columns=X_val.columns
)

X_test_scaled = pd.DataFrame(
    X_test_scaled,
    index=X_test.index,
    columns=X_test.columns
)



scaler_path = Path(f'../models/feature_scaler_{fh}hr.pkl')
joblib.dump(scaler, scaler_path)
print(f"Scaler guardado en {scaler_path}")

X_train shape: (97548, 49)
X_val shape: (20879, 49)
X_test shape: (20880, 49)

<class 'pandas.DataFrame'>
DatetimeIndex: 97548 entries, 2002-01-09 02:00:00 to 2013-09-06 13:00:00
Data columns (total 50 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   PJME_MW                 97548 non-null  float64
 1   hour                    97548 non-null  int64  
 2   dayofweek               97548 non-null  int64  
 3   quarter                 97548 non-null  int64  
 4   month                   97548 non-null  int64  
 5   year                    97548 non-null  int64  
 6   dayofyear               97548 non-null  int64  
 7   weekofyear              97548 non-null  int64  
 8   is_weekend              97548 non-null  int64  
 9   is_month_start          97548 non-null  int64  
 10  is_month_end            97548 non-null  int64  
 11  hour_sin                97548 non-null  float64
 12  hour_cos                97548 non-null  f

In [None]:
# SARIMAX
print('\n Model: SARIMAX')

sarimax = SARIMAXWrapper(
    order=(1,1,1),
    seasonal_order=(1,1,1,24),
    exog_cols=exog_cols
)

sarimax.fit(y_train, X_train_scaled)

sarimax_train_pred = sarimax.predict_in_sample()
sarimax_val_pred = sarimax.forecast(
    steps=len(y_val),
    X_future=X_val_scaled
)

metrics_sarimax = calculate_metrics(y_val, sarimax_val_pred, 'SARIMAX - Validation')
print_metrics(metrics_sarimax)
results.append({'model': 'SARIMAX', **metrics_sarimax})


# Prophet
print('\n Model: Prophet')

df_train_prophet = pd.DataFrame({'ds': y_train.index, 'y': y_train.values})
df_val_prophet = pd.DataFrame({'ds': y_val.index, 'y': y_val.values})

for col in exog_cols:
    df_train_prophet[col] = X_train_scaled[col].values
    df_val_prophet[col] = X_val_scaled[col].values

prophet_model = ProphetWrapper(seasonality_mode='multiplicative')
prophet_model.fit(df_train_prophet)

prophet_train_pred = prophet_model.predict(df_train_prophet)
prophet_val_pred = prophet_model.predict(df_val_prophet)

metrics_prophet = calculate_metrics(y_val, prophet_val_pred, 'Prophet - Validation')
print_metrics(metrics_prophet)
results.append({'model': 'Prophet', **metrics_prophet})


  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.



 Model: SARIMAX
Entrenando SARIMAX(1, 1, 1)x(1, 1, 1, 24)...
Advertencia: índice sin frecuencia explícita.


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
