In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from xgboost import XGBRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error

color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')

In [None]:
df = pd.read_csv('/kaggle/input/hourly-energy-consumption/PJME_hourly.csv',parse_dates=['Datetime'], index_col='Datetime')
df.head()

In [None]:
df.tail()

In [None]:
ax = df.plot(
    figsize=(30,10),
    color=color_pal[5],
    title='PMJE Energy Consumption', 
)

ax.set_xlabel('Date-Time')
ax.set_ylabel('Estimated Energy Consumption in Mega-Watts')

plt.show()

In [None]:
train = df.loc[df.index < '2015-01-01']
test = df.loc[df.index >= '2015-01-01']

In [None]:
fig, ax = plt.subplots(figsize=(30, 10))
train.plot(ax=ax, label='Training Set', title='Data Train/Test Split', color=color_pal[5])
test.plot(ax=ax, label='Test Set', color=color_pal[0])
ax.axvline('2015', color='black', ls='-')
ax.legend(['Training Set', 'Test Set'])
plt.show()

In [None]:
df_2010 = df.loc['2010-01-01':'2010-12-31']

fig = px.scatter(df_2010, x = df_2010.index, y = df_2010.PJME_MW, color=df_2010.PJME_MW)

fig.update_traces(marker=dict(size=0))

fig.update_layout(
    xaxis_title='Month',
    yaxis_title='Energy Consumption',
    title='Monthly Energy Consumption in 2010 (Bubble Chart)',
    xaxis=dict(
        tickvals=df.index,
        ticktext=df.index.strftime('%b'),  
        tickangle=45  
    )
)
fig.show()

In [None]:
def create_features(df):
    """
    Create time series features based on time series index.
    """
    df = df.copy()
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week
    return df

df = create_features(df)

In [None]:
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 7))
sns.barplot(data=df, x='hour', y='PJME_MW')
ax.set_title('MW by Hour')

plt.show()

In [None]:
train = create_features(train)
test = create_features(test)

In [None]:
FEATURES = ['dayofyear', 'hour', 'dayofweek', 'quarter', 'month', 'year']
TARGET = 'PJME_MW'

X_train = train[FEATURES]
y_train = train[TARGET]

X_test = test[FEATURES]
y_test = test[TARGET]


In [None]:
boost = XGBRegressor(base_score=0.5, booster='gbtree',    
                       n_estimators=1000,
                       early_stopping_rounds=200,
                       objective='reg:squarederror',
                       max_depth=3,
                       learning_rate=0.01)
boost.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=100)