# Error Analysis


Residual analysis for GBM predictions by hour and day-of-week.

In [None]:
# Environment setup
import sys, subprocess
from pathlib import Path

print('Python:', sys.executable)
repo_root = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
subprocess.run(['pip', 'install', '-e', str(repo_root)], check=True)

In [None]:
import pandas as pd, pickle

train = pd.read_parquet('data/processed/splits/train.parquet')
val = pd.read_parquet('data/processed/splits/val.parquet')
test = pd.read_parquet('data/processed/splits/test.parquet')

# Load GBM model bundle
bundle = pickle.load(open('artifacts/models/gbm_lightgbm_load_mw.pkl', 'rb'))
model = bundle['model']
feat_cols = bundle['feature_cols']

X = test[feat_cols].to_numpy()
y = test['load_mw'].to_numpy()
pred = model.predict(X)
resid = y - pred

err = test[['timestamp']].copy()
err['resid'] = resid
err['hour'] = pd.to_datetime(err['timestamp']).dt.hour
err['dow'] = pd.to_datetime(err['timestamp']).dt.dayofweek

err.groupby('hour')['resid'].mean().plot(title='Mean Residual by Hour')
