# Power load forecasting Based on LSTM + XGboost

In [1]:
# Importing useful libraries
import os
import pymysql
import requests
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
rcParams['figure.figsize'] = 18,6
from sqlalchemy import create_engine
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from datetime import datetime, timedelta, date

# Importing libraries for RNN's
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.python.client import device_lib
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.initializers import he_normal
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, Lambda
from tensorflow.keras.layers import ELU, LSTM, Dense, Dropout, Input, Reshape, Flatten, multiply, concatenate, LeakyReLU

# Importing libraries for XGboost
import xgboost as xgb
from sklearn.svm import SVR
from xgboost import plot_importance
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.ensemble import AdaBoostRegressor, ExtraTreesRegressor
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC
from sklearn.metrics import r2_score, accuracy_score, mean_squared_error, mean_squared_log_error
from sklearn.model_selection import KFold, cross_val_score, train_test_split, RandomizedSearchCV, GridSearchCV


# Used defined classes
path = "C:\\Users\\GirrajJangid\\jupyter notebook\\Climate Connect\\ServerFiles\\BYPL\\DayAheads\\keras_server\\NeuralNets\\"
os.chdir(path)
from bypl_support_new import *
from DBHelper import DbHelper
db = DbHelper()

# Data collection

In [None]:
# This will update the pickle file
def update_datasets():
    raw_df  = preprocess().fetch_data('bypl_data','train','holidays.csv')
    raw_wdf = preprocess().weather('brpl_weather_data', 8, {13:[1]}, 'no')
    return "Pickle updated."

In [None]:
print(update_datasets())

In [None]:

dates = date(2020,4,17)

df,wdf = raw_df.copy(), raw_wdf.copy()


In [None]:
def data_selection(data, year, month):
    temp = data[data.year.isin(year)]
    temp = temp[temp.month.isin(month)]
    return temp.reset_index(drop = True)

In [None]:
raw_df  = preprocess().fetch_data('bypl_data','train','holidays.csv')
raw_wdf = preprocess().weather('brpl_weather_data', 8, {13:[1]}, 'no')

In [None]:
for i in shift_features:
    df[i] = df[i].shift(-192)

df = df[features].copy()
df.dropna(inplace=True)
df['target'] = df.load.shift(-192)


validation = 15 * 96

x_train, y_train   = df[df.target.notna()].iloc[:-validation,1:-1], df[df.target.notna()].iloc[:-validation,-1]
x_valid, y_valid   = df[df.target.notna()].iloc[-validation:,1:-1], df[df.target.notna()].iloc[-validation:,-1] 
x_test = df.iloc[-96:].iloc[:,1:-1]

## Backcasting:

In [2]:
# Change only prediction date
prediction_date = date(2019,8,1)

today_date      = prediction_date - timedelta(days=1)
yesterday_date  = prediction_date - timedelta(days=2)
validation_date = prediction_date - timedelta(days=4) 
weekahead_date = prediction_date + timedelta(days=6)
print("Validation_date: ",validation_date)
print("Yesterday_date:  ",yesterday_date)
print("today_date:      ",today_date)
print("prediction_date: ",prediction_date)
print("Weekahead_date:  ",weekahead_date)

Validation_date:  2019-07-28
Yesterday_date:   2019-07-30
today_date:       2019-07-31
prediction_date:  2019-08-01
Weekahead_date:   2019-08-07


In [3]:
# Pickle file contains all data we need to remove all holidays thats why you have call that function again
# and again
raw_df  = preprocess().fetch_data('bypl_data','train','holidays.csv', prediction_date)
raw_wdf = preprocess().weather('brpl_weather_data', 8, {13:[1]}, 'no', prediction_date)

Importing pickle file for UDM load data...
pickle file imported
Fetching recent data to check for updates...
Load data already updated. Proceeding without any changes to pickled file...
Removing holidays...
Load data prepared
Fetching weather pickle file
Fetching weather data from api
location 13 data added
last date for actual weather data for plant id 13 is 2019-07-30
Processing load-weighted weather averages...
Adding derivatives...
Final derivatives added
Weather data transformation complete


In [4]:
raw_df

Unnamed: 0,datetime,date,load,tb
0,2016-04-02 00:00:00,2016-04-02,642.84,1
1,2016-04-02 00:15:00,2016-04-02,636.34,2
2,2016-04-02 00:30:00,2016-04-02,627.52,3
3,2016-04-02 00:45:00,2016-04-02,613.38,4
4,2016-04-02 01:00:00,2016-04-02,599.73,5
...,...,...,...,...
110139,2019-07-30 22:45:00,2019-07-30,1256.18,92
110140,2019-07-30 23:00:00,2019-07-30,1262.75,93
110141,2019-07-30 23:15:00,2019-07-30,1261.60,94
110142,2019-07-30 23:30:00,2019-07-30,1251.29,95


In [7]:
# This data is for training purpose
df, wdf = raw_df.copy(), raw_wdf.copy()

In [8]:
final_df = preprocess().feature_eng(df, wdf)

Adding time based features...
Adding lags...
Adding ewms for load...
Adding sine derivatives to time variables...
Adding the hourly running mean for load...
Merging load and weather databases...
Adding final features...
Feature engineering completed successfully


In [12]:
final_df.head(3)

Unnamed: 0,datetime,date,load,tb,dow,weekend,hour,doy,month,year,lag1b,lag2b,lag3b,lag1,lag2,lag3,lag4,lag5,lag6,load_wm2h,load_wm3h,load_wm5h,load_wm24h,sin_doy,sin_tb,sin_dow,hour_mean,apparent_temperature,temperature,humidity,dew_point,wind_speed,cloud_cover,humidex,RH,wci,tb_aptemp,load_aptemp,aptemp_mean_6h,aptemp_mean_12h,temperature_ewm,dp_ewm,hm12,wsp_12,cc_12,tb_load,sdtbrm,sdtbrm2,3tbrm,3tbrmw,factor,lag1wm,lag2wm,lag3wm,lag4wm,lag5wm,lag6wm,load_aptemp_ewm6,load_aptemp_ewm12,load_aptemp_ewm24,lagcwm,lag4avg,lag_df,lag_df1,load_wm12h,wt,tm,tm2,tm3,tm4,th,th2,th3,aptemp_lag12h,aptemp_lag1d,humidex_lag12h,humidex_lag1d
0,2016-04-02 00:00:00,2016-04-02,642.84,1,5,0,1,93,4,2016,,,,,,,,,,642.84,642.84,642.84,642.84,0.999546,0.065403,-0.974928,630.02,27.35,26.9,0.012888,15.73,0.84,0.01,31.335686,50.470953,29.153572,27.35,17581.674,27.35,27.35,26.9,15.73,0.012888,0.84,0.01,642.84,,,,,,,,,,,,17581.674,17581.674,17581.674,,,,,642.84,5,107.6,723.61,748.0225,109.4,26.9,723.61,19465.109,,,,
1,2016-04-02 00:15:00,2016-04-02,636.34,2,5,0,1,93,4,2016,642.84,,,,,,,,,639.18375,639.319167,639.4275,639.556146,0.999546,0.130526,-0.974928,630.02,27.735,27.3975,0.011922,14.5425,1.3,0.0075,31.08934,45.423505,29.618924,55.47,17648.8899,27.550521,27.54651,27.153932,15.12388,0.012395,1.074792,0.008724,1272.68,,,,,,,,,,,,17616.682281,17615.982116,17615.632033,,,,,639.522292,5,109.59,750.623006,758.810236,110.186042,27.3975,750.623006,20565.193814,,,,
2,2016-04-02 00:30:00,2016-04-02,627.52,3,5,0,1,93,4,2016,636.34,642.84,,,,,,,,634.288601,634.713949,635.055137,635.460226,0.999546,0.19509,-0.974928,630.02,28.12,27.895,0.011021,13.355,1.76,0.005,30.892916,40.855814,30.117798,84.36,17645.8624,27.756377,27.745693,27.411317,14.509519,0.011918,1.312776,0.007431,1882.56,,,,,,,,,,,,17627.230329,17626.360036,17625.919476,,,,,635.353696,5,111.58,778.131025,769.823474,110.982772,27.895,778.131025,21705.964942,,,,


In [15]:
#Specify which features do you want to use in the model
features = ['datetime','load','lag1','lag2','lag3','lag5',
            'lag4wm', # lag 4th days exponential weight
            'hour_mean','sin_doy','dow','tb_aptemp',
            'sdtbrm',         # today + 5th day lag average 
            '3tbrm','3tbrmw', # last 3 days load average
            'sdtbrm2',        # today + 6th day lag average
            'temperature_ewm','apparent_temperature',
            'RH','dew_point',
            'aptemp_mean_12h','aptemp_mean_6h','doy','humidex',
            'tm','tm2','tm3', 'tm4',   # apparent_temp 12 hour expoentail * month
            'load_wm3h','month','year','hour',
            'humidity','sin_tb',
            'hm12','dp_ewm',
            'lagcwm', # last days expoential then average
            'load_wm12h','wci','wsp_12','cc_12',
            'wind_speed','cloud_cover','temperature',
            'wt', # dow * hour
            'load_aptemp','load_aptemp_ewm6','load_aptemp_ewm12',
            'tb_load']


#Specify weather and time based variables to 'shift' them up when running the model as is required
shift_features = ['temperature_ewm','dp_ewm','temperature','apparent_temperature','dew_point','RH','aptemp_mean_6h',\
                'aptemp_mean_12h','humidex','dow','tb_aptemp','humidity','doy','month','hm12','wind_speed',\
                'cloud_cover','wsp_12','cc_12','wci','wt','tm','tm2','tm3','tm4']

#specify the features you want the model to treat as categorical. Note: This only works with lightgbm
categorical_features = ['dow','month','year','hour']


In [None]:
i = 192
df4 = final_df[selected_features]
df4['target'] = df4.load.shift(-i) 

for j in selected_shift_features:
    df4[j] = df4[j].shift(-i)

df4.dropna(inplace=True)
x, y = df4.drop('target',1).copy(), df4[['datetime','target']].copy()

In [None]:
validation = 15

In [None]:
x_train, y_train = x[x['datetime'] < misc().get_date(0) + ' 00:00:00'], y[y['datetime'] < misc().get_date(0) + ' 00:00:00']

In [None]:
x_train = x_train.drop(['date','datetime'],axis=1)
y_train = y_train.drop(['datetime'], axis=1)

In [None]:
x_train, y_train = x_train.iloc[:-validation*96,].reset_index(drop=True), y_train.iloc[:-validation*96].reset_index(drop=True)
x_val, y_val = x_train.iloc[-validation*96:,].reset_index(drop=True), y_train.iloc[-validation*96:].reset_index(drop=True)

In [None]:
x_train = x_train.iloc[:,1:]
x_val = x_val.iloc[:,1:]
y_train = y_train.iloc[:,1:]
y_val = y_val.iloc[:,1:]

In [None]:
dfi = 192
df4 = final_df[selected_features]
df4['target'] = df4.load.shift(-i) 

for j in selected_shift_features:
    df4[j] = df4[j].shift(-i)

df4.dropna(inplace=True)
x, y = df4.drop('target',1).copy(), df4[['datetime','target']].copy()

validation = 15

x_train, y_train = x[x['datetime'] < misc().get_date(0) + ' 00:00:00'], y[y['datetime'] < misc().get_date(0) + ' 00:00:00']

x_train = x_train.drop(['date','datetime'],axis=1)
y_train = y_train.drop(['datetime'], axis=1)

x_train, y_train = x_train.iloc[:-validation*96,].reset_index(drop=True), y_train.iloc[:-validation*96].reset_index(drop=True)
x_val, y_val = x_train.iloc[-validation*96:,].reset_index(drop=True), y_train.iloc[-validation*96:].reset_index(drop=True)

x_train = x_train.iloc[:,1:]
x_val = x_val.iloc[:,1:]
y_train = y_train.iloc[:,1:]
y_val = y_val.iloc[:,1:]

In [None]:
wdf

In [None]:
raw_wdf