In [1]:
%matplotlib widget
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

<b>Evaluation Metrics

In [2]:
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

class evaluate_metrics():
	def rmse(self, y_true, y_pred):
		return sqrt(self.mse(y_true, y_pred))

	def mse(self, y_true, y_pred):
		return mean_squared_error(y_true, y_pred)

	def mae(self, y_true, y_pred):
		return mean_absolute_error(y_true, y_pred)

In [3]:
from numpy import array, split

<b> Function for transform data into supervised learning

In [4]:
#Define a function split data into supervised learning
def split_sequence(series=None, labels=None, n_timesteps=24, train_length=254, val_length=54):
    n = len(data)
    
    #Find length by multiple timestep with days
    train_length  = train_length * n_timesteps
    val_length = val_length * n_timesteps + train_length
    
    #Splitting
    train_x, train_y = series[:train_length], labels[:train_length]
    val_x, val_y = series[train_length: val_length], labels[train_length: val_length]
    test_x, test_y = series[val_length:], labels[val_length:]
    
    #Split into sequence (24 timesteps)
    train_x, train_y = array(split(train_x, len(train_x)/n_timesteps)), array(split(train_y, len(train_y)/n_timesteps))
    val_x, val_y = array(split(val_x, len(val_x)/n_timesteps)), array(split(val_y, len(val_y)/n_timesteps))
    test_x, test_y = array(split(test_x, len(test_x)/n_timesteps)), array(split(test_y, len(test_y)/n_timesteps))
    
    return train_x, train_y, val_x, val_y, test_x, test_y

<h1> Load dataset

In [5]:
import os
#Loading a dataset
ds_path = '../dataset/clean/'
filename = 'building1retail.csv'
full_path = os.path.join(ds_path, filename)

dataframe = pd.read_csv(full_path, header=0, index_col=["Datetime"], parse_dates=True, infer_datetime_format=True, low_memory=False)
dataframe

Unnamed: 0_level_0,Power,Temperature,Smooth,Day_of_Week,Events
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-01 00:00:00,627.3,67.0,537.965366,4,0
2010-01-01 01:00:00,624.8,63.0,524.488132,4,0
2010-01-01 02:00:00,704.0,64.0,620.592628,4,0
2010-01-01 03:00:00,768.5,63.0,692.804419,4,0
2010-01-01 04:00:00,728.2,62.0,702.827174,4,0
...,...,...,...,...,...
2010-12-29 19:00:00,1363.7,64.0,1533.936705,2,0
2010-12-29 20:00:00,1305.8,62.0,1476.069433,2,0
2010-12-29 21:00:00,1288.8,63.0,1331.001113,2,0
2010-12-29 22:00:00,890.0,62.0,913.536397,2,0


There are a lot of papers mention that deep learning is working better when the data scale 

Therefore, in this experiment we are using min max scaler https://en.wikipedia.org/wiki/Feature_scaling

In [6]:
from sklearn.preprocessing import MinMaxScaler

In [7]:
#We want to use a previous 24 hours data predict next 24 hours data
#Original power no longer use so we have to drop it
dataframe = dataframe.drop(['Power'], axis=1)
dataframe

Unnamed: 0_level_0,Temperature,Smooth,Day_of_Week,Events
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01 00:00:00,67.0,537.965366,4,0
2010-01-01 01:00:00,63.0,524.488132,4,0
2010-01-01 02:00:00,64.0,620.592628,4,0
2010-01-01 03:00:00,63.0,692.804419,4,0
2010-01-01 04:00:00,62.0,702.827174,4,0
...,...,...,...,...
2010-12-29 19:00:00,64.0,1533.936705,2,0
2010-12-29 20:00:00,62.0,1476.069433,2,0
2010-12-29 21:00:00,63.0,1331.001113,2,0
2010-12-29 22:00:00,62.0,913.536397,2,0


<h1> Prepare data

In [8]:
#Scaler data

series = dataframe.values
scaler = MinMaxScaler()
scalered_data = scaler.fit_transform(series)

In [9]:
#Divide into data and label

data = scalered_data[:-24]
labels = scalered_data[24:, 1] #We only need to predict a power consumption

In [10]:
#Split data into supervised learning

train_x, train_y, val_x, val_y, test_x, test_y = split_sequence(data, labels)

<h1> Create model

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [12]:
#Create a simple LSTM model
# prepare data
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
# define model
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', metrics='mae', optimizer='adam')



In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 200)               164000    
_________________________________________________________________
dense (Dense)                (None, 100)               20100     
_________________________________________________________________
dense_1 (Dense)              (None, 24)                2424      
Total params: 186,524
Trainable params: 186,524
Non-trainable params: 0
_________________________________________________________________


<h1> Training

In [14]:
#Hyperparameters
verbose, epochs, batch_size = 1, 100, 32

history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=epochs, batch_size=batch_size, verbose=verbose)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [15]:
#Plotting a losses
train_loss = history.history["loss"]
val_loss = history.history["val_loss"]

plt.plot(train_loss, label="Training loss", color='r')
plt.plot(val_loss, label="Validation loss", color='b')
plt.xlabel("Epochs")
plt.ylabel("Mean Square Error")
plt.title("Training & Validation Losses")
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<h1> Predict

In [16]:
#Evaluation metrics
metrics = evaluate_metrics()

In [17]:
#Predict the test set
prediction = list()

yhat = model.predict(test_x)

yhat = yhat.flatten()
actual = test_y.flatten()

<h1> Evaluation metrics

In [18]:
#Output the error metrics
print(f"MSE: {metrics.mse(actual, yhat)}")
print(f"RMSE: {metrics.rmse(actual, yhat)}")
print(f"MAE: {metrics.mae(actual, yhat)}")

MSE: 0.0028537415116063605
RMSE: 0.05342042223350879
MAE: 0.034424595693681184


In [19]:
plt.close()
plt.plot(yhat, label="Predicted", color='r')
plt.plot(actual, label='Actual', color='b')
plt.xlabel("Time (h)")
plt.ylabel("Power")
plt.title("The predict of power consumption next 24 hours by used previous 24 hours")
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …