#### Mean Ensemble

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()

In [2]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [3]:
# Define folder and explicitly included files
folder = "results"

# List of CSV files to be included in the analysis
included_files = ["XGBoost.csv", "LightGBM.csv", "Quantile Regression.csv", "RF Quantile Regression.csv"]

# Dictionary to store the loaded DataFrames
dataframes = {}

# Load CSV files from the results folder
for file_name in os.listdir(folder):
    if file_name.endswith(".csv") and file_name in included_files:
        file_path = os.path.join(folder, file_name)

        # Read CSV file and set datetime column as index
        df = pd.read_csv(file_path, parse_dates=["Datetime"], index_col="Datetime")
        
        # Convert datetime index to UTC and then to Europe/Berlin timezone
        df.index = pd.to_datetime(df.index, utc=True)
        df.index = df.index.tz_convert('Europe/Berlin')
        
        # Store the DataFrame in the dictionary
        dataframes[file_name] = df

# CHeck if dataframes are loaded
if not dataframes:
    print("No csv")
else:
    all_dataframes = list(dataframes.values())
    
    first_columns = all_dataframes[0].columns
    if all(df.columns.equals(first_columns) for df in all_dataframes):
        Mean_ensemble = pd.concat(all_dataframes).groupby(level=0).mean()
    else:
        print("csv have different structure")


In [4]:
# Sort Quantile columns if quantile crossong occurs
results = Mean_ensemble.copy()

def fix_quantile_crossing(results):
    quantile_columns = [col for col in results.columns if col.startswith('q')]

    for idx in results.index:
        sorted_values = sorted(results.loc[idx, quantile_columns].values)
        results.loc[idx, quantile_columns] = sorted_values
    
    return results

In [5]:
folder = "results"
os.makedirs(folder, exist_ok=True)
results.to_csv(f"{folder}/Median Ensemble.csv", index=True)
results
results

Unnamed: 0_level_0,target,q0.025,q0.25,q0.5,q0.75,q0.975
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-22 01:00:00+01:00,48.25425,44.830530,45.984117,47.057447,48.278490,50.648483
2024-02-22 02:00:00+01:00,47.45925,43.760224,45.525394,46.430562,47.555875,49.712155
2024-02-22 03:00:00+01:00,47.70125,44.256679,46.182854,47.182064,48.335519,50.408230
2024-02-22 04:00:00+01:00,48.74600,46.251450,47.679499,48.926593,49.938173,51.772333
2024-02-22 05:00:00+01:00,51.82350,49.206657,50.603439,51.795747,52.767076,55.331980
...,...,...,...,...,...,...
2025-02-19 20:00:00+01:00,67.27350,62.190798,65.126892,66.159691,66.820468,68.441460
2025-02-19 21:00:00+01:00,63.54225,56.314822,60.872881,62.156700,62.555379,64.222300
2025-02-19 22:00:00+01:00,60.85075,54.619310,57.621969,58.771581,59.215309,61.049079
2025-02-19 23:00:00+01:00,57.24750,51.403248,54.765703,55.692216,56.164040,58.371352


#### Evaluation

In [6]:
quantile_losses = {}

for q in quantiles:
    y_pred = results[f'q{q}']
    y_true = results['target']
    
    quantile_loss = np.where(y_pred > y_true, 
                             2 * (1 - q) * (y_pred - y_true), 
                             2 * q * (y_true - y_pred))
    
    quantile_losses[f'Quantile_{q}'] = quantile_loss.mean()

total_loss_score = sum(quantile_losses.values())

print("Average Loss:")
for quantile, loss in quantile_losses.items():
    print(f"{quantile}: {loss}")

print(f"\Total Average Quantile Loss: {total_loss_score}")

Average Loss:
Quantile_0.025: 0.3094462060628277
Quantile_0.25: 1.3302844606374962
Quantile_0.5: 1.5017420033951767
Quantile_0.75: 1.2007077256815175
Quantile_0.975: 0.2317425057588937
\Total Average Quantile Loss: 4.573922901535912


In [7]:
results['hour'] = results.index.hour
results['dayofweek'] = results.index.dayofweek

horizons_dict = {}

target_horizons = [
    {"dayofweek": 4, "hour": 12, "name": "36"},  # Freitag 12:00 Stunde: 36
    {"dayofweek": 4, "hour": 16, "name": "40"},  # Freitag 16:00 Stunde: 40
    {"dayofweek": 4, "hour": 20, "name": "44"},  # Freitag 20:00 Stunde: 44
    {"dayofweek": 5, "hour": 12, "name": "60"},  # Samstag 12:00 Stunde: 60
    {"dayofweek": 5, "hour": 16, "name": "64"},  # Samstag 16:00 Stunde: 64
    {"dayofweek": 5, "hour": 20, "name": "68"},  # Samstag 20:00 Stunde: 68
]

for horizon in target_horizons:
    horizon_data = results[(results["dayofweek"] == horizon["dayofweek"]) & (results["hour"] == horizon["hour"])]
    horizon_data = horizon_data.drop(columns=["hour", "dayofweek"])

    horizons_dict[horizon["name"]] = horizon_data

In [8]:
def calculate_quantile_losses(horizons_dict, quantiles):
    all_quantile_losses = {}
    
    for key, df in horizons_dict.items():
        quantile_losses = {}
        for q in quantiles:
            y_pred = df[f'q{q}']
            y_true = df['target']
            quantile_loss = np.where(y_pred > y_true, 2 * (1 - q) * (y_pred - y_true), 2 * q * (y_true - y_pred))
            quantile_losses[f'q{q}'] = quantile_loss.mean()
        
        total_loss_score = sum(quantile_losses.values())
        quantile_losses['Total_Loss_Score'] = total_loss_score
        all_quantile_losses[key] = quantile_losses
    
    return all_quantile_losses

quantile_loss_results = calculate_quantile_losses(horizons_dict, quantiles)

horizon_results_df = pd.DataFrame(quantile_loss_results).T
horizon_results_df

Unnamed: 0,q0.025,q0.25,q0.5,q0.75,q0.975,Total_Loss_Score
36,0.498945,1.769521,1.813322,1.275718,0.218243,5.57575
40,0.499488,1.604889,1.774801,1.389132,0.315804,5.584114
44,0.414118,1.374265,1.426172,1.200865,0.28855,4.703971
60,0.263276,1.097103,1.407522,1.131724,0.204534,4.104159
64,0.255415,1.126811,1.464852,1.142064,0.203965,4.193107
68,0.166137,1.083115,1.334069,0.986163,0.203355,3.772839


#### Final Evaluation Score

In [9]:
horizon_results_df.sum()

q0.025               2.097380
q0.25                8.055706
q0.5                 9.220738
q0.75                7.125666
q0.975               1.434451
Total_Loss_Score    27.933941
dtype: float64