# 04 — Price Prediction by Hour
Gradient Boosting vs TSO baseline (`price day ahead`), MAE per hour (0–23).

In [1]:
import pandas as pd
import numpy as np
import json
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

df = pd.read_parquet('../cleaned_data.parquet')
df['time'] = pd.to_datetime(df['time'], utc=True)
print(f"Shape: {df.shape}")

Shape: (35056, 80)


In [2]:
# Train/test split
train = df[df['time'].dt.year <= 2017]
test = df[df['time'].dt.year == 2018]

# Features
weather_cols = [c for c in df.columns if any(
    c.startswith(p) for p in ['temp', 'pressure', 'humidity', 'wind_speed', 'wind_deg',
                               'rain_1h', 'rain_3h', 'snow_3h', 'clouds_all']
)]
time_cols = ['hour', 'day_of_week', 'month', 'is_weekend']
feature_cols = weather_cols + time_cols

target = 'price actual'

X_train = train[feature_cols].fillna(0)
y_train = train[target]
X_test = test[feature_cols].fillna(0)
y_test = test[target]

print(f"Train: {len(train)}, Test: {len(test)}")

Train: 26297, Test: 8759


In [3]:
# Train Gradient Boosting
gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)
y_pred_tso = test['price day ahead'].values

# Overall metrics
print(f"Overall MAE — TSO: {mean_absolute_error(y_test, y_pred_tso):.2f}, Ours: {mean_absolute_error(y_test, y_pred_gb):.2f}")

Overall MAE — TSO: 8.86, Ours: 10.48


In [4]:
# MAE per hour (0–23)
test_results = test[['time', target, 'price day ahead']].copy()
test_results['our_pred'] = y_pred_gb
test_results['hour'] = test_results['time'].dt.hour

hourly_data = []
for h in range(24):
    mask = test_results['hour'] == h
    subset = test_results[mask]
    mae_tso = mean_absolute_error(subset[target], subset['price day ahead'])
    mae_gb = mean_absolute_error(subset[target], subset['our_pred'])
    hourly_data.append({
        'hour': h,
        'mae_tso': round(mae_tso, 2),
        'mae_our_model': round(mae_gb, 2),
    })

hourly_df = pd.DataFrame(hourly_data)
print(hourly_df.to_string(index=False))

 hour  mae_tso  mae_our_model
    0     8.86          11.74
    1     8.91          10.60
    2     9.00          10.07
    3     8.67           9.84
    4     8.34          11.26
    5     8.09          11.06
    6     8.45          10.46
    7     8.80          10.25
    8     8.43          10.06
    9     8.87          10.40
   10     8.93          10.75
   11     9.29          11.36
   12     9.10          11.29
   13     8.92          10.45
   14     9.02          10.29
   15     9.12          10.64
   16     9.47          11.18
   17    10.25          10.49
   18     9.95          11.11
   19     9.25          10.74
   20     8.58           9.11
   21     7.95           9.45
   22     7.89           9.44
   23     8.53           9.49


In [5]:
# Export JSON
import os
os.makedirs('../dashboard/public/data', exist_ok=True)

output = {
    'overall': {
        'mae_tso': round(mean_absolute_error(y_test, y_pred_tso), 2),
        'mae_our_model': round(mean_absolute_error(y_test, y_pred_gb), 2),
    },
    'hourly': hourly_data,
}

with open('../dashboard/public/data/price_prediction.json', 'w') as f:
    json.dump(output, f, indent=2)

print('Saved price_prediction.json')

Saved price_prediction.json
