# Electricity Consumption Forecasting Using XGBRegressor

This example uses the XGBRegressor model to forecast electricity consumption using data from gov.ie. The time series ranges from 29th March 2011 to 17th February 2013.

#### Attributions

The below code uses the XGBRegressor model as available under the [XGBoost](https://github.com/dmlc/xgboost) library, of which the original authors **(Copyright © Contributors, 2021)** have made available under the Apache 2.0 license. Modifications have been made where appropriate for conducting analysis on the dataset specific to this example.

The data used in this example is sourced from [data.gov.ie](https://data.gov.ie/dataset/energy-consumption-gas-and-electricity-civic-offices-2009-2012/resource/6091c604-8c94-4b44-ac52-c1694e83d746).

The below work and findings are not endorsed by the original authors in any way.

   Copyright (c) 2021 by Contributors

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pmdarima as pm
from sklearn.metrics import mean_squared_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.tsa.stattools as ts

# fix random seed for reproducibility
np.random.seed(7)

# load dataset
d1 = pd.read_csv('dccelectricitycivicsblocks34p20130221-1840.csv', engine='python', skipfooter=3)
d1

Unnamed: 0,Civic Offices Blocks 3 and 4 KWh,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 88,Unnamed: 89,Unnamed: 90,Unnamed: 91,Unnamed: 92,Unnamed: 93,Unnamed: 94,Unnamed: 95,Unnamed: 96,Unnamed: 97
0,Date,Values,00:00,00:15,00:30,00:45,01:00,01:15,01:30,01:45,...,21:30,21:45,22:00,22:15,22:30,22:45,23:00,23:15,23:30,23:45
1,29/03/2011,96,,,,,,,,,...,31.80000305,31.20000076,31.65000153,30.60000038,32.85000229,31.05000305,32.10000229,31.35000229,31.65000153,30.15000153
2,30/03/2011,96,32.10000229,32.40000153,33.60000229,33,29.70000076,30.90000153,31.50000191,33,...,35.70000076,33.60000229,34.20000076,34.80000305,34.65000153,33.75,33.15000153,32.70000076,31.50000191,33.60000229
3,31/03/2011,96,32.10000229,33.60000229,35.10000229,33.45000076,31.20000076,31.35000038,34.20000076,33.75,...,33,33,35.25,34.35000229,32.40000153,31.20000076,32.84999847,33.45000076,32.10000229,32.10000229
4,01/04/2011,96,32.70000076,34.5,30.30000305,33,33,31.5,30.30000305,32.84999847,...,32.25,33.45000076,33,30.45000076,33.15000153,30.60000229,30.15000153,32.55000305,33,30.15000153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688,13/02/2013,96,25,25,25.5,24.5,25,26,25.5,25.5,...,33,32.5,30.5,27.5,26,26,26,26,25.5,26.5
689,14/02/2013,96,26,25.5,26,25.5,25,27,25.5,26.5,...,31,30,30.5,29,28,26.5,25.5,25,25.5,25
690,15/02/2013,96,25.5,24.5,25.5,24.5,25,26.5,25,25.5,...,29,27,28,28,25.5,26,25.5,25.5,24.5,25
691,16/02/2013,96,25,25,24.5,24.5,24.5,26.5,25,25.5,...,24.5,24,24.5,26,24.5,24,25,24.5,25,24


In [2]:
d2=d1.rename(columns=d1.iloc[0])
d3=d2.drop(d1.index[0])
d3
d3.drop(d3.index[0])
d4=d3.drop('Date', axis=1)
d5=d4.drop('Values', axis=1)
d5
d6=d5.dropna()
d7=d6.values
d7
df=np.sum(d7, axis=1, dtype=float)
df

array([4981.5001927 , 5166.60016445, 3046.35014537, 3101.10013769,
       4908.60016439, 4858.50017742, 4905.00019836, 4999.95019526,
       3075.90013122, 3023.5501442 , 5004.6001587 , 5199.30019957,
       5226.75017163, 5162.55022428, 4991.55017468, 2950.20010378,
       4883.85017776, 5055.15017129, 5084.10021592, 4914.00019451,
       3581.55014991, 2981.55008892, 2967.00011064, 3515.70014566,
       4721.10016438, 4931.85019494, 5020.05018234, 4799.40017322,
       3043.20012856, 2929.05012318, 3423.45014192, 4949.55017475,
       5155.05015188, 5238.75021174, 5129.10016059, 3088.95013995,
       3017.40010454, 5041.95018196, 5170.80017096, 5250.90023994,
       5239.35021975, 5163.60019308, 3169.65013694, 3034.65012932,
       5056.05021094, 5052.6001988 , 5048.2501869 , 4971.75019264,
       3013.20013239, 2919.45011703, 4830.0002022 , 5007.90018087,
       5028.00018885, 5088.60017207, 5092.35015869, 3069.90011787,
       2909.85011099, 4923.60022544, 5136.75021744, 5233.65021

In [3]:
df=np.array(df)
df

array([4981.5001927 , 5166.60016445, 3046.35014537, 3101.10013769,
       4908.60016439, 4858.50017742, 4905.00019836, 4999.95019526,
       3075.90013122, 3023.5501442 , 5004.6001587 , 5199.30019957,
       5226.75017163, 5162.55022428, 4991.55017468, 2950.20010378,
       4883.85017776, 5055.15017129, 5084.10021592, 4914.00019451,
       3581.55014991, 2981.55008892, 2967.00011064, 3515.70014566,
       4721.10016438, 4931.85019494, 5020.05018234, 4799.40017322,
       3043.20012856, 2929.05012318, 3423.45014192, 4949.55017475,
       5155.05015188, 5238.75021174, 5129.10016059, 3088.95013995,
       3017.40010454, 5041.95018196, 5170.80017096, 5250.90023994,
       5239.35021975, 5163.60019308, 3169.65013694, 3034.65012932,
       5056.05021094, 5052.6001988 , 5048.2501869 , 4971.75019264,
       3013.20013239, 2919.45011703, 4830.0002022 , 5007.90018087,
       5028.00018885, 5088.60017207, 5092.35015869, 3069.90011787,
       2909.85011099, 4923.60022544, 5136.75021744, 5233.65021

In [4]:
def create_dataset(df, previous=1):
    dataX, dataY = [], []
    for i in range(len(df)-previous-1):
        a = df[i:(i+previous), 0]
        dataX.append(a)
        dataY.append(df[i + previous, 0])
    return np.array(dataX), np.array(dataY)

In [5]:
df=df.reshape(-1,1)
df

array([[4981.5001927 ],
       [5166.60016445],
       [3046.35014537],
       [3101.10013769],
       [4908.60016439],
       [4858.50017742],
       [4905.00019836],
       [4999.95019526],
       [3075.90013122],
       [3023.5501442 ],
       [5004.6001587 ],
       [5199.30019957],
       [5226.75017163],
       [5162.55022428],
       [4991.55017468],
       [2950.20010378],
       [4883.85017776],
       [5055.15017129],
       [5084.10021592],
       [4914.00019451],
       [3581.55014991],
       [2981.55008892],
       [2967.00011064],
       [3515.70014566],
       [4721.10016438],
       [4931.85019494],
       [5020.05018234],
       [4799.40017322],
       [3043.20012856],
       [2929.05012318],
       [3423.45014192],
       [4949.55017475],
       [5155.05015188],
       [5238.75021174],
       [5129.10016059],
       [3088.95013995],
       [3017.40010454],
       [5041.95018196],
       [5170.80017096],
       [5250.90023994],
       [5239.35021975],
       [5163.600

In [6]:
train_size = int(len(df) * 0.8)
train_size

544

In [7]:
test_size = len(df) - train_size
test_size

136

In [8]:
train, test = df[0:train_size,:], df[train_size:len(df),:]

In [9]:
train

array([[4981.5001927 ],
       [5166.60016445],
       [3046.35014537],
       [3101.10013769],
       [4908.60016439],
       [4858.50017742],
       [4905.00019836],
       [4999.95019526],
       [3075.90013122],
       [3023.5501442 ],
       [5004.6001587 ],
       [5199.30019957],
       [5226.75017163],
       [5162.55022428],
       [4991.55017468],
       [2950.20010378],
       [4883.85017776],
       [5055.15017129],
       [5084.10021592],
       [4914.00019451],
       [3581.55014991],
       [2981.55008892],
       [2967.00011064],
       [3515.70014566],
       [4721.10016438],
       [4931.85019494],
       [5020.05018234],
       [4799.40017322],
       [3043.20012856],
       [2929.05012318],
       [3423.45014192],
       [4949.55017475],
       [5155.05015188],
       [5238.75021174],
       [5129.10016059],
       [3088.95013995],
       [3017.40010454],
       [5041.95018196],
       [5170.80017096],
       [5250.90023994],
       [5239.35021975],
       [5163.600

In [10]:
test

array([[4862.5],
       [4869.5],
       [4768. ],
       [3175.5],
       [2951.5],
       [4756.5],
       [4855. ],
       [4877.5],
       [4777.5],
       [4636.5],
       [3140. ],
       [2950. ],
       [4711. ],
       [4894.5],
       [4589. ],
       [4354.5],
       [4304. ],
       [2745.5],
       [2639. ],
       [4335. ],
       [4442.5],
       [4417.5],
       [4370.5],
       [4240.5],
       [2549.5],
       [2486.5],
       [4184.5],
       [4369. ],
       [4432. ],
       [4437. ],
       [2722.5],
       [2451.5],
       [4403. ],
       [4491. ],
       [4468. ],
       [4481.5],
       [4361.5],
       [2432. ],
       [4340.5],
       [4498.5],
       [4506. ],
       [4459.5],
       [4407. ],
       [2562.5],
       [2469. ],
       [4304.5],
       [4430.5],
       [4449.5],
       [4484. ],
       [4335. ],
       [2695. ],
       [2451. ],
       [4500. ],
       [4581. ],
       [4679. ],
       [4696. ],
       [4632.5],
       [2873. ],
       [2643. 

In [11]:
train.shape

(544, 1)

In [12]:
test.shape

(136, 1)

In [13]:
# Lookback period
lookback = 7
X_train, Y_train = create_dataset(train, lookback)
X_test, Y_test = create_dataset(test, lookback)

In [14]:
from xgboost import XGBRegressor
model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
model.fit(X_train, Y_train)

In [15]:
testpred = model.predict(X_test)

In [16]:
Y_test=Y_test.reshape(-1,1)
testpred=testpred.reshape(-1,1)

In [17]:
import math
from math import sqrt
test_mse = mean_squared_error(Y_test, testpred)
rmse = sqrt(test_mse)
print('RMSE: %f' % rmse)

RMSE: 437.935136


In [18]:
np.mean(Y_test)

3895.140625