#### Median Ensemble    

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()

In [2]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [3]:
# Define folder and explicitly included files
folder = "results"

# List of CSV files to be included in the analysis
included_files = ["XGBoost.csv", "LightGBM.csv", "Quantile Regression.csv", "RF Quantile Regression.csv"]

# Dictionary to store the loaded DataFrames
dataframes = {}

# Load CSV files from the results folder
for file_name in os.listdir(folder):
    if file_name.endswith(".csv") and file_name in included_files:
        file_path = os.path.join(folder, file_name)

        # Read CSV file and set datetime column as index
        df = pd.read_csv(file_path, parse_dates=["Datetime"], index_col="Datetime")
        
        # Convert datetime index to UTC and then to Europe/Berlin timezone
        df.index = pd.to_datetime(df.index, utc=True)
        df.index = df.index.tz_convert('Europe/Berlin')
        
        # Store the DataFrame in the dictionary
        dataframes[file_name] = df

# CHeck if dataframes are loaded
if not dataframes:
    print("No csv")
else:
    all_dataframes = list(dataframes.values())
    
    first_columns = all_dataframes[0].columns
    if all(df.columns.equals(first_columns) for df in all_dataframes):
        Median_ensemble = pd.concat(all_dataframes).groupby(level=0).median()
    else:
        print("csv have different structure")

In [4]:
# Sort Quantile columns if quantile crossong occurs
results = Median_ensemble.copy()

def fix_quantile_crossing(results):
    quantile_columns = [col for col in results.columns if col.startswith('q')]

    for idx in results.index:
        sorted_values = sorted(results.loc[idx, quantile_columns].values)
        results.loc[idx, quantile_columns] = sorted_values
    
    return results

In [5]:
folder = "results"
os.makedirs(folder, exist_ok=True)
results.to_csv(f"{folder}/Median Ensemble.csv", index=True)
results
results

Unnamed: 0_level_0,target,q0.025,q0.25,q0.5,q0.75,q0.975
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-22 01:00:00+01:00,3284.0,2415.604015,3703.204971,4064.780322,4418.253092,5354.822765
2024-02-23 01:00:00+01:00,4771.0,3069.940562,4415.838491,4714.148292,5024.685726,5903.480164
2024-02-24 01:00:00+01:00,3351.0,2333.945220,3306.800621,3526.725561,3783.014627,4788.041277
2024-02-25 01:00:00+01:00,1850.0,1177.924746,1610.592205,1779.863833,1923.400638,2861.109694
2024-02-26 01:00:00+01:00,4518.0,2503.555176,4134.694992,4555.823157,5046.868747,6149.167689
...,...,...,...,...,...,...
2025-02-15 01:00:00+01:00,2819.0,2159.440068,2978.454397,3221.079269,3450.972193,4420.432463
2025-02-16 01:00:00+01:00,1329.0,1070.695677,1446.303657,1650.012966,1928.002204,2603.425053
2025-02-17 01:00:00+01:00,4081.0,2773.621792,3875.453091,4154.926524,4450.605676,5498.598194
2025-02-18 01:00:00+01:00,4173.0,2925.464216,3992.650527,4294.041175,4517.881036,5729.506039


#### Evaluation

In [6]:
quantile_losses = {}

for q in quantiles:
    y_pred = results[f'q{q}']
    y_true = results['target']
    
    quantile_loss = np.where(y_pred > y_true, 
                             2 * (1 - q) * (y_pred - y_true), 
                             2 * q * (y_true - y_pred))
    
    quantile_losses[f'Quantile_{q}'] = quantile_loss.mean()

total_loss_score = sum(quantile_losses.values())

print("Average Loss:")
for quantile, loss in quantile_losses.items():
    print(f"{quantile}: {loss}")

print(f"\Total Average Quantile Loss: {total_loss_score}")

Average Loss:
Quantile_0.025: 83.36862280701668
Quantile_0.25: 286.7680127839453
Quantile_0.5: 338.67711937642576
Quantile_0.75: 300.21523799509856
Quantile_0.975: 84.72849251788875
\Total Average Quantile Loss: 1093.757485480375


In [7]:
results['dayofweek'] = results.index.dayofweek

horizons_dict = {}

target_horizons = [
    {"dayofweek": 1, "name": "1 day"},  # Dienstag
    {"dayofweek": 2, "name": "2 day"},  # Mittwoch
    {"dayofweek": 3, "name": "3 day"},  # Donnerstag
    {"dayofweek": 4, "name": "4 day"},  # Freitag
    {"dayofweek": 5, "name": "5 day"},  # Samstag
    {"dayofweek": 6, "name": "6 day"},  # Sonntag
]


for horizon in target_horizons:
    horizon_data = results[(results["dayofweek"] == horizon["dayofweek"])]
    horizon_data = horizon_data.drop(columns=["dayofweek"])

    horizons_dict[horizon["name"]] = horizon_data

In [8]:
def calculate_quantile_losses(horizons_dict, quantiles):
    all_quantile_losses = {}
    
    for key, df in horizons_dict.items():
        quantile_losses = {}
        for q in quantiles:
            y_pred = df[f'q{q}']
            y_true = df['target']
            quantile_loss = np.where(y_pred > y_true, 2 * (1 - q) * (y_pred - y_true), 2 * q * (y_true - y_pred))
            quantile_losses[f'q{q}'] = quantile_loss.mean()
        
        total_loss_score = sum(quantile_losses.values())
        quantile_losses['Total_Loss_Score'] = total_loss_score
        all_quantile_losses[key] = quantile_losses
    
    return all_quantile_losses

quantile_loss_results = calculate_quantile_losses(horizons_dict, quantiles)

horizon_results_df = pd.DataFrame(quantile_loss_results).T
horizon_results_df

Unnamed: 0,q0.025,q0.25,q0.5,q0.75,q0.975,Total_Loss_Score
1 day,102.675336,392.744333,435.076943,377.52882,87.740765,1395.766198
2 day,95.215064,288.313052,363.123174,300.668836,86.006854,1133.326979
3 day,94.78049,307.546703,325.45041,311.028037,85.052983,1123.858622
4 day,92.621491,315.009899,362.341006,314.677428,86.618458,1171.268283
5 day,65.154004,248.661651,340.0939,319.159038,86.420867,1059.48946
6 day,40.161319,172.964836,200.48129,167.663209,75.565757,656.836411


#### Final Evaluation Score

In [9]:
horizon_results_df.sum()

q0.025               490.607704
q0.25               1725.240474
q0.5                2026.566722
q0.75               1790.725369
q0.975               507.405684
Total_Loss_Score    6540.545953
dtype: float64