# PJM Hourly Energy Consumption Case

PJM Interconnection LLC (PJM) is a regional transmission organization (RTO) in the United States. It is part of the Eastern Interconnection grid operating an electric transmission system serving all or parts of Delaware, Illinois, Indiana, Kentucky, Maryland, Michigan, New Jersey, North Carolina, Ohio, Pennsylvania, Tennessee, Virginia, West Virginia, and the District of Columbia.

The hourly power consumption data comes from PJM's website and are in megawatts (MW).

### XGBoost Training Step - By Sabrina Otoni da Silva - 2024/04

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

import xgboost as xgb

In [None]:
datapath = Path('../data/d02_intermediate')

In [None]:
df = pd.read_csv(f'{datapath}/pjme_n03.csv')
df = df.set_index('datetime')
df.index = pd.to_datetime(df.index)
df = df.sort_index()

In [None]:
tss = TimeSeriesSplit(n_splits=5, test_size=24*365*1, gap=24)

In [None]:
x = ['hour', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'day', 'weekofyear', 'lag1', 'lag2', 'lag3']
y = ['pjme_mw']

In [None]:
preds = []
scores = []

for train_idx, test_idx in tss.split(df):
    train = df.iloc[train_idx]
    test = df.iloc[test_idx]

    X_train = train[x]
    y_train = train[y]

    X_test = test[x]
    y_test = test[y]

    reg = xgb.XGBRegressor(base_score=0.5,
                           booster='gbtree',
                           n_estimators=1_000,
                           early_stopping_rounds=50,
                           objective='reg:squarederror',
                           max_depth=3,
                           learning_rate=0.01)
    
    reg.fit(X_train, y_train,
            eval_set=[(X_train, y_train), (X_test, y_test)],
            verbose=100)
    
    y_pred = reg.predict(X_test)
    preds.append(y_pred)

    score = np.sqrt(mean_squared_error(y_test, y_pred))
    scores.append(score)

In [None]:
print(f'Score across folds: {np.mean(scores):0.4f}')
print(f'Fold scores: {scores}')