In [None]:
# install packages

!pip install gridstatus statsmodels pandas matplotlib
!pip install -q gdown
!pip install --upgrade --quiet \
    numpy==1.26.4 \
    pandas==2.2.2 \
    scikit-learn==1.3.2 \
    xgboost==2.0.3 \
    lightgbm==4.1.0 \
    catboost==1.2.7

In [None]:
# import statements
from datetime import *
import numpy as np
import pandas as pd
from gridstatus import Ercot
import xgboost as xgb

ercot = Ercot()

from pathlib import Path
base_path = Path.home() / "Documents" / "totalenergies_price_forecasting"

In [None]:
def get_feat(file_path):
    df = pd.read_csv(file_path)
    X = df.loc[:, df.columns != 'Price ($/MWh)'] 
    y = df['Price ($/MWh)']

    return X, y

In [None]:
def feat_forecast(feature, X, y, fh, ml): 
  print(f"Training univariate forecaster for feature: {feature}")
   
  # create lagged versions of the feature
  for lag in range(1, ml + 1):
    X[f"{feature}_lag{lag}"] = X[feature].shift(lag)
     
  feature_lagged = X[[f"{feature}_lag{lag}" for lag in range(1, ml + 1)]].dropna()  # drop NaNs
  y_target = X[feature].shift(-fh).dropna()
     
  # align X and y (must use common indices)
  common_idx = feature_lagged.index.intersection(y_target.index)
  X_feat = feature_lagged.loc[common_idx]
  y_feat = y_target.loc[common_idx]

  train_size = int(0.8 * len(X_feat))
  X_feat_train, X_feat_test = X_feat.iloc[:train_size], X_feat.iloc[train_size:]
  y_feat_train, y_feat_test = y_feat.iloc[:train_size], y_feat.iloc[train_size:]

  model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=50) 
  model.fit(X_feat_train, y_feat_train)  # train model

  # predict future forecast_horizon steps using most recent lag features
  last_known = X[[f"{feature}_lag{lag}" for lag in range(1, ml + 1)]].iloc[-fh:]

  feature_preds = model.predict(last_known)  # make prediction
  return feature_preds

In [None]:
def time_shift(X, y, fh, discard):
    y_shifted = y.shift(-fh)
    # drop rows with NaNs caused by shift (they occur at the end)
    valid_idx = y_shifted.dropna().index

    # align X and y so that input X[t] corresponds to output y[t+horizon]
    X_supervised = X.loc[valid_idx]
    for col in discard:
        X_supervised = X_supervised.drop(col, axis=1)
    y_supervised = y_shifted.loc[valid_idx]

    # Print the shapes to confirm
    print(f"Shape of X_supervised: {X_supervised.shape}")
    print(f"Shape of y_supervised: {y_supervised.shape}")

    # Optional: Reset index if you want a clean DataFrame
    X_supervised = X_supervised.reset_index(drop=True)
    y_supervised = y_supervised.reset_index(drop=True)

    return X_supervised, y_supervised

In [None]:
def hybrid_feature_forecast(file_path, dynamic_features, forecast_horizon, max_lag):
    X, y = get_feat(file_path)
    predicted_features = pd.DataFrame() 

    for feat in dynamic_features:
        feature_preds = feat_forecast(feat, X, y, forecast_horizon, max_lag)
        predicted_features[feat] = feature_preds
    
    X_supervised, y_supervised = time_shift(X, y, forecast_horizon, dynamic_features)
    
    assert predicted_features.shape[0] == X_supervised.shape[0]

    # Add suffix to predicted features to indicate they are forecasts
    predicted_features = predicted_features.add_suffix('_forecast')

    # Concatenate original X_supervised with forecasted weather features
    X_combined = pd.concat([X_supervised, predicted_features], axis=1)

    return X_combined, y_supervised

In [None]:
forecast_horizon = 48
max_lag = 24

file_path = base_path / "data" / "processed" / ""
dynamic_features = []