#### Median Ensemble    

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()

In [2]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [None]:
# Define folder and explicitly included files
folder = "results"

# List of CSV files to be included in the analysis
included_files = ["XGBoost.csv", "LightGBM.csv", "Quantile Regression.csv", "RF Quantile Regression.csv"]

# Dictionary to store the loaded DataFrames
dataframes = {}

# Load CSV files from the results folder
for file_name in os.listdir(folder):
    if file_name.endswith(".csv") and file_name in included_files:
        file_path = os.path.join(folder, file_name)

        # Read CSV file and set datetime column as index
        df = pd.read_csv(file_path, parse_dates=["Datetime"], index_col="Datetime")
        
        # Convert datetime index to UTC and then to Europe/Berlin timezone
        df.index = pd.to_datetime(df.index, utc=True)
        df.index = df.index.tz_convert('Europe/Berlin')
        
        # Store the DataFrame in the dictionary
        dataframes[file_name] = df

# CHeck if dataframes are loaded
if not dataframes:
    print("No csv")
else:
    all_dataframes = list(dataframes.values())
    
    first_columns = all_dataframes[0].columns
    if all(df.columns.equals(first_columns) for df in all_dataframes):
        Median_ensemble = pd.concat(all_dataframes).groupby(level=0).median()
    else:
        print("csv have different structure")

In [None]:
# Sort Quantile columns if quantile crossong occurs
results = Median_ensemble.copy()

def fix_quantile_crossing(results):
    quantile_columns = [col for col in results.columns if col.startswith('q')]

    for idx in results.index:
        sorted_values = sorted(results.loc[idx, quantile_columns].values)
        results.loc[idx, quantile_columns] = sorted_values
    
    return results

In [5]:
folder = "results"
os.makedirs(folder, exist_ok=True)
results.to_csv(f"{folder}/Median Ensemble.csv", index=True)
results
results

Unnamed: 0_level_0,target,q0.025,q0.25,q0.5,q0.75,q0.975
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-22 01:00:00+01:00,48.25425,45.037508,45.915488,46.934534,48.196240,50.584345
2024-02-22 02:00:00+01:00,47.45925,44.741577,45.485921,46.434342,47.553239,49.543518
2024-02-22 03:00:00+01:00,47.70125,44.982484,46.129792,47.171630,48.275048,50.294886
2024-02-22 04:00:00+01:00,48.74600,46.018607,47.724599,48.994587,49.934709,51.685667
2024-02-22 05:00:00+01:00,51.82350,48.941498,50.403571,51.915589,52.766591,55.318144
...,...,...,...,...,...,...
2025-02-19 20:00:00+01:00,67.27350,62.398092,65.119249,66.030536,66.906161,67.825707
2025-02-19 21:00:00+01:00,63.54225,57.573884,61.150932,62.272025,62.623170,63.857059
2025-02-19 22:00:00+01:00,60.85075,54.852335,57.732074,58.786553,59.204978,60.927780
2025-02-19 23:00:00+01:00,57.24750,51.522341,54.785843,55.776668,55.718605,57.890709


#### Evaluation

In [None]:
quantile_losses = {}

for q in quantiles:
    y_pred = results[f'q{q}']
    y_true = results['target']
    
    quantile_loss = np.where(y_pred > y_true, 
                             2 * (1 - q) * (y_pred - y_true), 
                             2 * q * (y_true - y_pred))
    
    quantile_losses[f'Quantile_{q}'] = quantile_loss.mean()

total_loss_score = sum(quantile_losses.values())

print("Average Loss:")
for quantile, loss in quantile_losses.items():
    print(f"{quantile}: {loss}")

print(f"\Total Average Quantile Loss: {total_loss_score}")

Durchschnittlicher Quantile Loss für jedes Quantil:
Quantile_0.025: 0.3122006314147768
Quantile_0.25: 1.3338554548317845
Quantile_0.5: 1.4800538109376686
Quantile_0.75: 1.2032477188295707
Quantile_0.975: 0.24220877255004516

Gesamtscore über alle Quantile: 4.571566388563845


In [None]:
results['hour'] = results.index.hour
results['dayofweek'] = results.index.dayofweek

horizons_dict = {}

target_horizons = [
    {"dayofweek": 4, "hour": 12, "name": "36"},  # Freitag 12:00 Stunde: 36
    {"dayofweek": 4, "hour": 16, "name": "40"},  # Freitag 16:00 Stunde: 40
    {"dayofweek": 4, "hour": 20, "name": "44"},  # Freitag 20:00 Stunde: 44
    {"dayofweek": 5, "hour": 12, "name": "60"},  # Samstag 12:00 Stunde: 60
    {"dayofweek": 5, "hour": 16, "name": "64"},  # Samstag 16:00 Stunde: 64
    {"dayofweek": 5, "hour": 20, "name": "68"},  # Samstag 20:00 Stunde: 68
]

for horizon in target_horizons:
    horizon_data = results[(results["dayofweek"] == horizon["dayofweek"]) & (results["hour"] == horizon["hour"])]
    horizon_data = horizon_data.drop(columns=["hour", "dayofweek"])

    horizons_dict[horizon["name"]] = horizon_data

In [None]:
def calculate_quantile_losses(horizons_dict, quantiles):
    all_quantile_losses = {}
    
    for key, df in horizons_dict.items():
        quantile_losses = {}
        for q in quantiles:
            y_pred = df[f'q{q}']
            y_true = df['target']
            quantile_loss = np.where(y_pred > y_true, 2 * (1 - q) * (y_pred - y_true), 2 * q * (y_true - y_pred))
            quantile_losses[f'q{q}'] = quantile_loss.mean()
        
        total_loss_score = sum(quantile_losses.values())
        quantile_losses['Total_Loss_Score'] = total_loss_score
        all_quantile_losses[key] = quantile_losses
    
    return all_quantile_losses

quantile_loss_results = calculate_quantile_losses(horizons_dict, quantiles)

horizon_results_df = pd.DataFrame(quantile_loss_results).T
horizon_results_df

Unnamed: 0,q0.025,q0.25,q0.5,q0.75,q0.975,Total_Loss_Score
36,0.495462,1.755645,1.740637,1.306716,0.222785,5.521245
40,0.496016,1.586477,1.669313,1.342806,0.340979,5.43559
44,0.422968,1.355689,1.390319,1.125025,0.295106,4.589107
60,0.263398,1.137326,1.479417,1.227717,0.231123,4.338981
64,0.277828,1.090628,1.478109,1.210741,0.207589,4.264896
68,0.167943,1.0998,1.337035,1.02753,0.187752,3.820061


#### Final Evaluation Score

In [9]:
horizon_results_df.sum()

q0.025               2.123616
q0.25                8.025565
q0.5                 9.094831
q0.75                7.240536
q0.975               1.485333
Total_Loss_Score    27.969881
dtype: float64