In [18]:
# install packages
%pip install -q gdown
%pip install --upgrade --quiet \
    pandas==2.2.2 \
    xgboost==2.0.3 \
    scikit-learn==1.3.2 
%pip install pandas openpyxl


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [20]:
# import statements
from datetime import *
import pandas as pd
import xgboost as xgb

from pathlib import Path
base_path = Path.home() / "Documents" / "totalenergies_price_forecasting"

In [21]:
def get_feat(file_path):
    df = pd.read_csv(file_path)
    X = df.loc[:, df.columns != 'Price ($/MWh)'] 
    y = df['Price ($/MWh)']

    return X, y

In [41]:
def feat_forecast(feature, X, fh, ml):
    print(f"Training univariate forecaster for feature: {feature}")

    for lag in range(1, ml + 1):
        X[f"{feature}_lag{lag}"] = X[feature].shift(lag)

    # Prepare lagged features and future target
    feature_lagged = X[[f"{feature}_lag{lag}" for lag in range(1, ml + 1)]].dropna()
    y_target = X[feature].shift(-fh).dropna()

    # Align based on common index
    common_idx = feature_lagged.index.intersection(y_target.index)
    X_feat = feature_lagged.loc[common_idx]
    y_feat = y_target.loc[common_idx]

    # Train-test split (not needed for forecasting but used here to train model on 80%)
    train_size = int(0.8 * len(X_feat))
    X_feat_train, y_feat_train = X_feat.iloc[:train_size], y_feat.iloc[:train_size]

    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=50)
    model.fit(X_feat_train, y_feat_train)

    # Use most recent lags for prediction
    last_known = X[[f"{feature}_lag{lag}" for lag in range(ml, 0, -1)]].iloc[-1:].values
    future_preds = []

    # Rolling prediction to get fh steps ahead
    for _ in range(fh):
        pred = model.predict(last_known)[0]
        future_preds.append(pred)

        # Shift lags and insert prediction as newest value
        last_known = list(last_known[0][1:]) + [pred]
        last_known = [last_known]

    return pd.Series(future_preds)

In [39]:
def time_shift(X, y, fh, discard):
    y_shifted = y.shift(-fh)
    valid_idx = y_shifted.dropna().index

    X_supervised = X.loc[valid_idx].reset_index(drop=True)
    # if discard is not None:
    #     X_supervised = X_supervised.drop(columns=discard, errors='ignore')
    y_supervised = y_shifted.loc[valid_idx].reset_index(drop=True)

    print(f"Shape of X_supervised: {X_supervised.shape}")
    print(f"Shape of y_supervised: {y_supervised.shape}")

    return X_supervised, y_supervised

In [50]:
def hybrid_feature_forecast(file_path, dynamic_features, forecast_horizon, max_lag):
    X, y = get_feat(file_path)

    X_supervised, y_supervised = time_shift(X, y, forecast_horizon, dynamic_features)
    predicted_features = pd.DataFrame() 

    for feat in dynamic_features:
        feature_preds = feat_forecast(feat, X, forecast_horizon, max_lag)
        predicted_features[feat] = feature_preds
    
    X_final = X_supervised.iloc[-forecast_horizon:].reset_index(drop=True)
    y_final = y_supervised.iloc[-forecast_horizon:].reset_index(drop=True)
    
    # Add suffix to predicted features to indicate they are forecasts
    predicted_features = predicted_features.add_suffix('_forecast')
    X_final = X_final.drop(columns=dynamic_features, errors='ignore')
    X_combined = pd.concat([X_final, predicted_features], axis=1)

    return X_combined, y_final

In [51]:
forecast_horizon = 48
max_lag = 24

file_path = Path.home() / "Downloads" / "All_Years_Fuel_and_RTM.csv"
dynamic_features = ['Biomass', 'Coal']

X_final, y_final = hybrid_feature_forecast(file_path, dynamic_features, forecast_horizon, max_lag)

Shape of X_supervised: (26250, 6)
Shape of y_supervised: (26250,)
Training univariate forecaster for feature: Biomass
Training univariate forecaster for feature: Coal


In [52]:
X_final

Unnamed: 0,Timestamp,Gas,Hydro,Nuclear,Biomass_forecast,Coal_forecast
0,2024-12-28 00:00:00,753.641312,1.078275,1271.76036,3.163593,1770.042603
1,2024-12-28 01:00:00,164.320421,1.394871,1272.281878,4.466888,1847.417603
2,2024-12-28 02:00:00,163.61977,1.802736,1272.405564,3.041061,1670.215332
3,2024-12-28 03:00:00,164.052862,5.949049,1272.274251,3.464301,1560.33252
4,2024-12-28 04:00:00,164.103863,1.444727,1272.215706,2.439709,1445.627319
5,2024-12-28 05:00:00,164.380882,1.092538,1272.36202,3.641155,1583.409912
6,2024-12-28 06:00:00,163.358914,1.087671,1272.196569,3.384474,1573.917725
7,2024-12-28 07:00:00,176.606576,1.086134,1272.247601,3.777083,1726.480469
8,2024-12-28 08:00:00,185.981323,1.080637,1272.267583,3.614427,1697.296875
9,2024-12-28 09:00:00,149.865478,5.666587,1272.308946,2.87284,1880.075928


In [53]:
y_final

0      4.3550
1     -2.2425
2     -9.2950
3    -13.5125
4    -18.8925
5    -14.2200
6     -2.5825
7      9.0725
8      9.5975
9     -2.8050
10    -3.9200
11    -3.7125
12    -4.5425
13    -4.8850
14    -2.9900
15    -1.3650
16    -0.8150
17    16.9400
18    44.7850
19    33.1575
20    25.2100
21    20.8400
22    14.8500
23    13.6175
24    12.4825
25    14.0000
26    10.6375
27     8.9475
28    11.6050
29    15.1825
30    18.3575
31    20.8850
32    20.1650
33     6.4925
34    -0.3900
35    -0.5675
36    10.6000
37    20.8525
38    22.1300
39    21.4675
40    15.1300
41    27.7975
42    50.0825
43    55.3275
44    34.5850
45    26.4975
46    22.6250
47    22.2325
Name: Price ($/MWh), dtype: float64