#### Median Ensemble    

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()

In [2]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [3]:
# Define folder and explicitly included files
folder = "results"

# List of CSV files to be included in the analysis
included_files = ["XGBoost.csv", "LightGBM.csv", "Quantile Regression.csv", "RF Quantile Regression.csv"]

# Dictionary to store the loaded DataFrames
dataframes = {}

# Load CSV files from the results folder
for file_name in os.listdir(folder):
    if file_name.endswith(".csv") and file_name in included_files:
        file_path = os.path.join(folder, file_name)

        # Read CSV file and set datetime column as index
        df = pd.read_csv(file_path, parse_dates=["Datetime"], index_col="Datetime")
        
        # Convert datetime index to UTC and then to Europe/Berlin timezone
        df.index = pd.to_datetime(df.index, utc=True)
        df.index = df.index.tz_convert('Europe/Berlin')
        
        # Store the DataFrame in the dictionary
        dataframes[file_name] = df

# CHeck if dataframes are loaded
if not dataframes:
    print("No csv")
else:
    all_dataframes = list(dataframes.values())
    
    first_columns = all_dataframes[0].columns
    if all(df.columns.equals(first_columns) for df in all_dataframes):
        Median_ensemble = pd.concat(all_dataframes).groupby(level=0).median()
    else:
        print("csv have different structure")

In [4]:
# Sort Quantile columns if quantile crossong occurs
results = Median_ensemble.copy()

def fix_quantile_crossing(results):
    quantile_columns = [col for col in results.columns if col.startswith('q')]

    for idx in results.index:
        sorted_values = sorted(results.loc[idx, quantile_columns].values)
        results.loc[idx, quantile_columns] = sorted_values
    
    return results

In [5]:
folder = "results"
os.makedirs(folder, exist_ok=True)
results.to_csv(f"{folder}/Median Ensemble.csv", index=True)
results
results

Unnamed: 0_level_0,target,q0.025,q0.25,q0.5,q0.75,q0.975
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-22 01:00:00+01:00,3284.0,2261.950850,3124.409471,3407.469804,3648.445216,5086.198085
2024-02-23 01:00:00+01:00,4771.0,3428.988623,4693.416194,4802.510813,5060.826543,5494.559389
2024-02-24 01:00:00+01:00,3351.0,2767.777519,3817.482900,3983.651673,4337.406068,5176.500259
2024-02-25 01:00:00+01:00,1850.0,1511.088080,1794.557784,1941.702855,2133.158408,4301.228622
2024-02-26 01:00:00+01:00,4518.0,2809.349261,3931.184533,4260.996042,4357.866343,5126.054665
...,...,...,...,...,...,...
2025-02-15 01:00:00+01:00,2819.0,2519.183535,3299.584220,3568.520329,3769.806306,5002.821062
2025-02-16 01:00:00+01:00,1329.0,1174.907540,1289.087069,1457.815310,1413.211921,3944.691920
2025-02-17 01:00:00+01:00,4081.0,2728.772384,3561.937930,3999.312934,4310.764896,5326.277278
2025-02-18 01:00:00+01:00,4173.0,2855.092770,3801.252337,4169.807182,4490.368822,5293.970198


#### Evaluation

In [6]:
quantile_losses = {}

for q in quantiles:
    y_pred = results[f'q{q}']
    y_true = results['target']
    
    quantile_loss = np.where(y_pred > y_true, 
                             2 * (1 - q) * (y_pred - y_true), 
                             2 * q * (y_true - y_pred))
    
    quantile_losses[f'Quantile_{q}'] = quantile_loss.mean()

total_loss_score = sum(quantile_losses.values())

print("Average Loss:")
for quantile, loss in quantile_losses.items():
    print(f"{quantile}: {loss}")

print(f"\Total Average Quantile Loss: {total_loss_score}")

Average Loss:
Quantile_0.025: 97.53332603186844
Quantile_0.25: 404.5905478966258
Quantile_0.5: 474.0480201810206
Quantile_0.75: 368.7543513324433
Quantile_0.975: 85.93832931798462
\Total Average Quantile Loss: 1430.8645747599428


In [7]:
results['dayofweek'] = results.index.dayofweek

horizons_dict = {}

target_horizons = [
    {"dayofweek": 1, "name": "1 day"},  # Dienstag
    {"dayofweek": 2, "name": "2 day"},  # Mittwoch
    {"dayofweek": 3, "name": "3 day"},  # Donnerstag
    {"dayofweek": 4, "name": "4 day"},  # Freitag
    {"dayofweek": 5, "name": "5 day"},  # Samstag
    {"dayofweek": 6, "name": "6 day"},  # Sonntag
]


for horizon in target_horizons:
    horizon_data = results[(results["dayofweek"] == horizon["dayofweek"])]
    horizon_data = horizon_data.drop(columns=["dayofweek"])

    horizons_dict[horizon["name"]] = horizon_data

In [8]:
def calculate_quantile_losses(horizons_dict, quantiles):
    all_quantile_losses = {}
    
    for key, df in horizons_dict.items():
        quantile_losses = {}
        for q in quantiles:
            y_pred = df[f'q{q}']
            y_true = df['target']
            quantile_loss = np.where(y_pred > y_true, 2 * (1 - q) * (y_pred - y_true), 2 * q * (y_true - y_pred))
            quantile_losses[f'q{q}'] = quantile_loss.mean()
        
        total_loss_score = sum(quantile_losses.values())
        quantile_losses['Total_Loss_Score'] = total_loss_score
        all_quantile_losses[key] = quantile_losses
    
    return all_quantile_losses

quantile_loss_results = calculate_quantile_losses(horizons_dict, quantiles)

horizon_results_df = pd.DataFrame(quantile_loss_results).T
horizon_results_df

Unnamed: 0,q0.025,q0.25,q0.5,q0.75,q0.975,Total_Loss_Score
1 day,158.246634,514.286575,539.067543,385.430603,77.885955,1674.91731
2 day,103.050915,365.251411,424.789396,319.706709,68.100836,1280.899267
3 day,95.104023,390.961866,473.492508,407.431741,91.710726,1458.700864
4 day,127.72714,489.07397,532.493587,400.965574,75.390828,1625.651099
5 day,67.811915,477.096021,647.936965,463.596081,101.344703,1757.785685
6 day,45.303438,194.459126,257.711337,253.895222,122.783253,874.152374


#### Final Evaluation Score

In [9]:
horizon_results_df.sum()

q0.025               597.244064
q0.25               2431.128967
q0.5                2875.491336
q0.75               2231.025931
q0.975               537.216300
Total_Loss_Score    8672.106599
dtype: float64