In [4]:
model_performance = results_df.groupby("Model")["MAPE"].mean().sort_values()
best_model = model_performance.index[0]

print("=== Overall Model Performance (Mean MAPE) ===")
print(model_performance)
print(f"\nBest Performing Model (lowest MAPE): {best_model}")


=== Overall Model Performance (Mean MAPE) ===
Model
SARIMAX      6.844716
Ensemble     7.996853
GBR          8.817247
LSTM        10.159014
Name: MAPE, dtype: float64

Best Performing Model (lowest MAPE): SARIMAX


In [6]:
feature_performance = results_df.groupby("FeatureSet")["MAPE"].mean().sort_values()
best_feature_set = feature_performance.index[0]

print("\n=== Overall Feature Set Performance (Mean MAPE) ===")
print(feature_performance)
print(f"\nBest Performing Feature Set: {best_feature_set}")



=== Overall Feature Set Performance (Mean MAPE) ===
FeatureSet
Generated Features             7.480015
Both Exogenous & Generated     7.669233
Exogenous Features            10.214125
Name: MAPE, dtype: float64

Best Performing Feature Set: Generated Features


In [7]:
combo_performance = results_df.groupby(["Model", "FeatureSet"])["MAPE"].mean().sort_values()
best_combo = combo_performance.index[0]

print("\n=== Model-Feature Set Combinations (Mean MAPE) ===")
print(combo_performance)
print(f"\nBest Combination: {best_combo}")



=== Model-Feature Set Combinations (Mean MAPE) ===
Model     FeatureSet                
SARIMAX   Both Exogenous & Generated     3.788101
          Generated Features             3.910006
Ensemble  Generated Features             7.016058
          Both Exogenous & Generated     7.139478
GBR       Both Exogenous & Generated     8.817247
          Exogenous Features             8.817247
          Generated Features             8.817247
LSTM      Exogenous Features             9.368189
Ensemble  Exogenous Features             9.835023
LSTM      Generated Features            10.176749
          Both Exogenous & Generated    10.932105
SARIMAX   Exogenous Features            12.836040
Name: MAPE, dtype: float64

Best Combination: ('SARIMAX', 'Both Exogenous & Generated')


In [3]:
results_df.to_csv('ensemble_resuls.csv', index=False)

In [9]:
print("\n=== Best Combination by Horizon ===")
for horizon_value, group in results_df.groupby("Horizon"):
    combo_perf_horizon = group.groupby(["Model", "FeatureSet"])["MAPE"].mean().sort_values()
    best_combo_horizon = combo_perf_horizon.index[0]
    best_combo_mape    = combo_perf_horizon.iloc[0]
    print(f"Horizon = {horizon_value}: Best = {best_combo_horizon}, MAPE = {best_combo_mape:.2f}")



=== Best Combination by Horizon ===
Horizon = 1: Best = ('SARIMAX', 'Exogenous Features'), MAPE = 2.13
Horizon = 30: Best = ('SARIMAX', 'Both Exogenous & Generated'), MAPE = 4.14
Horizon = 90: Best = ('SARIMAX', 'Both Exogenous & Generated'), MAPE = 4.15
Horizon = 180: Best = ('SARIMAX', 'Both Exogenous & Generated'), MAPE = 3.95


In [13]:
actuals = test_data[target].iloc[:horizon].values
mean_actual = np.mean(actuals) 

In [17]:
import numpy as np
import pandas as pd
import warnings

from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.ensemble import GradientBoostingRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

###############################################################################
# 1) Load & Preprocess Data
###############################################################################
data = pd.read_csv("temp.csv")
target = "loadConsumption"

feature_sets = {
    "Exogenous Features": [
        'DailyMeanTemperature', 'DailyMeanWindspeed', 'DailyPrecipitation', 'day_of_week',
        'AveragePrice_Electricity_Household', 'AveragePrice_NaturalGas_Household',
        'Economic_Component', 'RenewableEnergy_Component'
    ],
    "Generated Features": [
        'is_weekend', 'is_holiday', 'day_of_year', 'rolling_7', 'rolling_30', 'rolling_365'
    ],
    "Both Exogenous & Generated": [
        'DailyMeanTemperature', 'DailyMeanWindspeed', 'DailyPrecipitation', 'day_of_week',
        'AveragePrice_Electricity_Household', 'AveragePrice_NaturalGas_Household',
        'Economic_Component', 'RenewableEnergy_Component',
        'is_weekend', 'is_holiday', 'day_of_year', 'rolling_7', 'rolling_30', 'rolling_365'
    ]
}

forecast_horizons = [1, 30, 90, 180]
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

###############################################################################
# 2) Helper Functions
###############################################################################
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def build_lstm(seq_len):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(seq_len, 1)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

###############################################################################
# 3) Main Loop Over Feature Sets and Horizons
###############################################################################
results_rows = []
for fs_name, fs_cols in feature_sets.items():
    for horizon in forecast_horizons:
        print(f"\n=== Feature Set: {fs_name} | Horizon: {horizon} ===")

        # SARIMAX
        sarimax_model = SARIMAX(
            endog=train_data[target],
            exog=train_data[fs_cols],
            order=(2,0,3),
            seasonal_order=(1,0,1,7),
            enforce_stationarity=False,
            enforce_invertibility=False
        )
        sarimax_results = sarimax_model.fit(disp=False)
        future_exog = test_data[fs_cols].iloc[:horizon] if fs_cols else None
        sarimax_forecast = sarimax_results.get_forecast(steps=horizon, exog=future_exog).predicted_mean.values

        # GBR
        train_gbr = train_data.copy()
        scaler_gbr = MinMaxScaler()
        train_gbr['scaled_target'] = scaler_gbr.fit_transform(train_gbr[target].values.reshape(-1,1))

        def make_gbr_dataset(df, lag=7):
            X, y = [], []
            series = df['scaled_target'].values
            for i in range(lag, len(series)):
                X.append(series[i-lag:i])
                y.append(series[i])
            return np.array(X), np.array(y)

        X_train_gbr, y_train_gbr = make_gbr_dataset(train_gbr, lag=7)
        gbr = GradientBoostingRegressor(n_estimators=100, max_depth=3, random_state=42)
        gbr.fit(X_train_gbr, y_train_gbr)

        last_seq = list(X_train_gbr[-1])
        gbr_forecast_scaled = []
        for step in range(horizon):
            next_val = gbr.predict([last_seq[-7:]])[0]
            gbr_forecast_scaled.append(next_val)
            last_seq.append(next_val)

        gbr_forecast = scaler_gbr.inverse_transform(np.array(gbr_forecast_scaled).reshape(-1,1)).flatten()

        # LSTM
        train_lstm = train_data.copy()
        scaler_lstm = MinMaxScaler()
        train_lstm['scaled_target'] = scaler_lstm.fit_transform(train_lstm[target].values.reshape(-1,1))

        def make_lstm_dataset(df, window=14):
            X, y = [], []
            series = df['scaled_target'].values
            for i in range(window, len(series)):
                X.append(series[i-window:i])
                y.append(series[i])
            return np.array(X), np.array(y)

        X_train_lstm, y_train_lstm = make_lstm_dataset(train_lstm)
        X_train_lstm = X_train_lstm.reshape(X_train_lstm.shape[0], X_train_lstm.shape[1], 1)

        model_lstm = build_lstm(14)
        model_lstm.fit(X_train_lstm, y_train_lstm, epochs=10, batch_size=32, verbose=0)

        lstm_preds_scaled = []
        last_window = X_train_lstm[-1,:,:].reshape(1, 14, 1)

        for step in range(horizon):
            next_scaled = model_lstm.predict(last_window, verbose=0)[0, 0]
            lstm_preds_scaled.append(next_scaled)
            new_window = np.append(last_window[0,1:,0], [next_scaled])
            last_window = new_window.reshape(1, 14, 1)

        lstm_forecast = scaler_lstm.inverse_transform(np.array(lstm_preds_scaled).reshape(-1,1)).flatten()

        # Ensemble
        ensemble_forecast = (sarimax_forecast + gbr_forecast + lstm_forecast) / 3.0

        # Actuals and normalizing constant
        actuals = test_data[target].iloc[:horizon].values
        norm_const = np.mean(actuals)

        def rmse(y_true, y_pred):
            return np.sqrt(mean_squared_error(y_true, y_pred))

        # Append normalized (%-based) metrics
        for model_name, forecast in [
            ("SARIMAX", sarimax_forecast),
            ("GBR", gbr_forecast),
            ("LSTM", lstm_forecast),
            ("Ensemble", ensemble_forecast)
        ]:
            rmse_val = rmse(actuals, forecast) / norm_const * 100
            mae_val = mean_absolute_error(actuals, forecast) / norm_const * 100
            mape_val = mean_absolute_percentage_error(actuals, forecast)  # already in %

            results_rows.append({
                "FeatureSet": fs_name,
                "Horizon": horizon,
                "Model": model_name,
                "RMSE (%)": rmse_val,
                "MAE (%)": mae_val,
                "MAPE (%)": mape_val
            })

###############################################################################
# 4) Compile Results
###############################################################################
results_df = pd.DataFrame(results_rows)
print("\n============ Final Normalized Results (in %) ============")
print(results_df[["FeatureSet","Horizon","Model","RMSE (%)","MAE (%)","MAPE (%)"]])



=== Feature Set: Exogenous Features | Horizon: 1 ===

=== Feature Set: Exogenous Features | Horizon: 30 ===

=== Feature Set: Exogenous Features | Horizon: 90 ===

=== Feature Set: Exogenous Features | Horizon: 180 ===

=== Feature Set: Generated Features | Horizon: 1 ===

=== Feature Set: Generated Features | Horizon: 30 ===

=== Feature Set: Generated Features | Horizon: 90 ===

=== Feature Set: Generated Features | Horizon: 180 ===

=== Feature Set: Both Exogenous & Generated | Horizon: 1 ===

=== Feature Set: Both Exogenous & Generated | Horizon: 30 ===

=== Feature Set: Both Exogenous & Generated | Horizon: 90 ===

=== Feature Set: Both Exogenous & Generated | Horizon: 180 ===

                    FeatureSet  Horizon     Model   RMSE (%)    MAE (%)  \
0           Exogenous Features        1   SARIMAX   2.131296   2.131296   
1           Exogenous Features        1       GBR  12.052463  12.052463   
2           Exogenous Features        1      LSTM   5.574308   5.574308   
3      

In [2]:
results_df

Unnamed: 0,FeatureSet,Horizon,Model,RMSE,MAE,MAPE
0,Exogenous Features,1,SARIMAX,6083.064418,6083.064418,2.131296
1,Exogenous Features,1,GBR,34399.678038,34399.678038,12.052463
2,Exogenous Features,1,LSTM,23909.805625,23909.805625,8.377173
3,Exogenous Features,1,Ensemble,21464.182694,21464.182694,7.520311
4,Exogenous Features,30,SARIMAX,18917.728808,16480.003845,6.377902
5,Exogenous Features,30,GBR,24352.282568,19484.082072,7.140548
6,Exogenous Features,30,LSTM,24975.016539,20850.353083,7.988161
7,Exogenous Features,30,Ensemble,19649.768651,16406.640124,6.269447
8,Exogenous Features,90,SARIMAX,25762.504552,21714.921326,8.83424
9,Exogenous Features,90,GBR,25015.495599,20201.217202,8.165212
