## Initial Setup


In [None]:
import tensorflow as tf
tf.enable_eager_execution()

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from sklearn import svm
from sklearn.svm import SVR
from sklearn import metrics

from tensorflow.keras import backend as K

from scipy import stats
from datetime import datetime

mpl.rcParams['figure.figsize'] = (12, 9)
mpl.rcParams['axes.grid'] = False

Variables which will be necessary later for the Recurrent Neural Network are initialized

In [None]:
TRAIN_SPLIT = 6400
tf.random.set_random_seed(13)
BATCH_SIZE = 256
BUFFER_SIZE = 10000
STEP = 1

# Data Prep

The CSV files are imported into Pandas DataFrames

In [None]:
tempUrl = 'https://raw.githubusercontent.com/buds-lab/the-building-data-genome-project/master/data/raw/temp_open_utc.csv'
tempOpen = pd.read_csv(tempUrl)

In [None]:
dfTemp = pd.DataFrame()

metaUrl = 'https://raw.githubusercontent.com/buds-lab/the-building-data-genome-project/master/data/raw/meta_open.csv'
metaOpen = pd.read_csv(metaUrl)

These functions return the hour or day of the year based on the date inputted

In [None]:
def getHour(dayx):
    return dayx.hour

def getDayOfYear(dayy):
    return dayy.timetuple().tm_yday

The getWeather functions gathers weather data from the repository for the building, cleans it and prepares it to be merged

In [None]:
def getWeather(weatherNo):
    # Weather columns:
    #timestamp	Conditions	DateUTC<br />	Dew PointC	Events	Gust SpeedKm/h	Humidity	
    #Precipitationmm	Sea Level PressurehPa	TemperatureC	TimeBST	TimeGMT	VisibilityKm	Wind Direction	Wind SpeedKm/h	WindDirDegrees	timestamp

    weatherUrl = "https://raw.githubusercontent.com/buds-lab/the-building-data-genome-project/master/data/external/weather/" + weatherNo

    weather1x = pd.read_csv(weatherUrl, index_col = 0)
    weather1x = weather1x.rename(columns = {'Wind SpeedKm/h' : "WindSpeedKmh"})

    weather1 = weather1x[['TemperatureC','Humidity','Sea Level PressurehPa', 'Dew PointC', 'VisibilityKm', "WindSpeedKmh"]].copy()

    county = 0
    for y in weather1['WindSpeedKmh']:
        if y == 'Calm':
            weather1.iloc[county,5] = np.NaN
        county += 1

    weather1 = weather1.astype(np.float)


    weather1.index = pd.to_datetime(weather1.index, utc = True)

    # Weather files are resampled to align timestamps with other data
    weather1 = weather1.resample('60min').mean()
    #weather1.to_csv('weather1resampled.csv')

    #Interpolating missing data in the key columns
    weather1['TemperatureC'] = weather1['TemperatureC'].interpolate()
    weather1['Humidity'] = weather1['Humidity'].interpolate()
    weather1['Sea Level PressurehPa'] = weather1['Sea Level PressurehPa'].interpolate()

    weather1['Dew PointC'] = weather1['Dew PointC'].interpolate()
    weather1['VisibilityKm'] = weather1['VisibilityKm'].interpolate()
    weather1['WindSpeedKmh'] = weather1['WindSpeedKmh'].interpolate()



    # Removing outliers and replacing with NaNs to be interpolated
    z = np.abs(stats.zscore(weather1))

    # Outliers, defined as having a zscore gt 5, are replaced with NaNs
    x,y = np.where(z > 5)
    for f in range(x.size):
        weather1.iloc[x[f],y[f]] = np.NaN

    # These are again interpolated
    weather1['TemperatureC'] = weather1['TemperatureC'].interpolate()
    weather1['Humidity'] = weather1['Humidity'].interpolate()
    weather1['Sea Level PressurehPa'] = weather1['Sea Level PressurehPa'].interpolate()

    weather1['Dew PointC'] = weather1['Dew PointC'].interpolate()
    weather1['VisibilityKm'] = weather1['VisibilityKm'].interpolate()
    weather1['WindSpeedKmh'] = weather1['WindSpeedKmh'].interpolate()

    weather1['dayOfTheWeek'] = weather1.index
    weather1['dayOfTheWeek'] = weather1['dayOfTheWeek'].apply(datetime.weekday)

    weather1['hourOfTheDay'] = weather1.index
    weather1['hourOfTheDay'] = weather1['hourOfTheDay'].apply(getHour)

    weather1['dayOfTheYear'] = weather1.index
    weather1['dayOfTheYear'] = weather1['dayOfTheYear'].apply(getDayOfYear)

    weather1['dayOfTheWeek'] = weather1['dayOfTheWeek'].fillna(method = 'ffill')
    weather1['hourOfTheDay'] = weather1['hourOfTheDay'].fillna(method = 'ffill')
    weather1['dayOfTheYear'] = weather1['dayOfTheYear'].fillna(method = 'ffill')

    return weather1

The following creates a dictionary where building names are matched up to a few key pieces of their meta data

In [None]:
columnDict = {}

for index, row in metaOpen.iterrows():
    columnDict[row['uid']] = [row['sqm'],row['newweatherfilename'],row['primaryspaceusage'],row['timezone']]

Finally the files are collated. A counter is used to ensure only 3 buildings are done (for time purposes) but this can be removed to do all buildings. A CSV file containing all relevant building data is created.

In [None]:
counter = 0

for column in tempOpen:
    # Counter used here to stop at 3 buildings, for testing purposes
    if counter == 3:
        break

    if counter != 0 and column != 'timestamp':
        dfTemp['timestamp'] = tempOpen['timestamp']
        dfTemp['Usage'] = (tempOpen[column])
        
        dfTemp['sqm'] = columnDict[column][0]
        dfTemp['newweatherfilename'] = columnDict[column][1]
        dfTemp['primaryspaceusage'] = columnDict[column][2]
        dfTemp['timezone'] = columnDict[column][3]

        newWeatherFile = getWeather(columnDict[column][1])

        dfTemp['timestamp'] = pd.to_datetime(dfTemp['timestamp'])
        mergedTempDf = pd.merge(dfTemp,newWeatherFile, on='timestamp')

        mergedTempDf = mergedTempDf.set_index('timestamp')


        df_interpol = mergedTempDf\
                .resample('H')\
                .mean()
        df_interpol['Usage'] = df_interpol['Usage'].interpolate()
        
        dfx = df_interpol[df_interpol.isna().any(axis=1)]
        
        df_interpol.to_csv(column + '_Interpol.csv')

        dfTemp.drop(['Usage', "timestamp"], axis = 1, inplace = True)

    counter += 1

The data is then put into a DataFrame and normalized. Some of the relevant features are plotted before and after normalization.

In [None]:
df = pd.read_csv('PrimClass_Jaylin_Interpol.csv')

features_considered = ['Usage', 'TemperatureC', 'Humidity','Sea Level PressurehPa',
                       'Dew PointC', 'VisibilityKm', "WindSpeedKmh", 'dayOfTheWeek', 'hourOfTheDay']

features = df[features_considered]
features.index = df['timestamp']

dataset1 = np.zeros(shape=(0,0))

dataset1 = features.values
data_mean = np.nanmean(dataset1[:TRAIN_SPLIT],axis=0)
data_std = np.nanstd(dataset1[:TRAIN_SPLIT],axis=0)

dataset1df = pd.DataFrame(dataset1, columns = ['Usage', 'TemperatureC', 'Humidity','Sea Level PressurehPa',
                       'Dew PointC', 'VisibilityKm', "WindSpeedKmh", 'dayOfTheWeek', 'hourOfTheDay'])

dataset1df[['TemperatureC','Usage', 'Humidity']].plot(subplots=True)

In [None]:
dataset1 = (dataset1-data_mean)/data_std

dataset1df = pd.DataFrame(dataset1, columns = ['Usage', 'TemperatureC', 'Humidity','Sea Level PressurehPa',
                       'Dew PointC', 'VisibilityKm', "WindSpeedKmh", 'dayOfTheWeek', 'hourOfTheDay'])

usageDataMaster = dataset1df.copy()

dataset1df[['TemperatureC','Usage', 'Humidity']].plot(subplots=True)

Predictions using all features are quite poor so we now perform feature selection, using a pairwise correlation feature.

Since we are trying to predict the building metering data (denoted as Usage), we sort the correlations with Usage in descending order.

In [None]:
sortedFeatures = pd.DataFrame((dataset1df.corr()['Usage'].abs().sort_values(ascending = False)))
sortedFeatures

In [None]:
bestFeaturesPearsonCorr = [(dataset1df.corr()['Usage'].abs().sort_values(ascending = False)).index]
bestFeaturesPearsonCorr = bestFeaturesPearsonCorr[0]
bestFeaturesPearsonCorr = bestFeaturesPearsonCorr[:4]
bestFeaturesPearsonCorr

In [None]:
sortedFeaturesCrossCorr = pd.DataFrame((dataset1df.rolling(24).corr(pairwise=True).dropna()).corr()['Usage'].abs().sort_values(ascending = False))
sortedFeaturesCrossCorr

In [None]:
bestFeaturesCrossCorr = [(dataset1df.rolling(24).corr(pairwise=True).dropna()).corr()['Usage'].abs().sort_values(ascending = False).index]
bestFeaturesCrossCorr = bestFeaturesCrossCorr[0]
bestFeaturesCrossCorr = bestFeaturesCrossCorr[:4]
bestFeaturesCrossCorr

From this table we can see the features which correlate the most with usage (PrimClass_Jaylin) are Humidity, Wind Speed and the day of the week. Therefore we will make a dataset with only these values.

In [None]:
dates = np.arange(0,1568)

In [None]:
usageData = usageDataMaster.copy()

usageData.columns

We created a new feature 'Usage24Ahead', which is the Usage data for the time 24 hours after that time stamp

In [None]:
usageData['Usage24Ahead'] = usageData['Usage'].shift(-24)
usageData = usageData.dropna()

for x in usageData:
    if x not in bestFeaturesPearsonCorr and x != 'Usage24Ahead':
        usageData.drop([x], axis = 1, inplace = True)

#usageData.drop(['TemperatureC','Sea Level PressurehPa','Dew PointC', 'VisibilityKm','hourOfTheDay'], axis = 1, inplace = True)

# Support Vector Machine

Next we created the SVM. We must first separate the values into the dependent variables (X_svm) and the independent variable which we are trying to predict (y_svm). We then split this data into training and test data.

In [None]:
X_svm = np.array(usageData[[bestFeaturesPearsonCorr]])
#X_svm = np.array(usageData[bestFeaturesCrossCorr])
y_svm = np.array(usageData['Usage24Ahead'])

In [None]:
from sklearn.model_selection import train_test_split
X_train_svm, X_test_svm, y_train_svm, y_test_svm = train_test_split(X_svm, y_svm, test_size = 0.2, random_state = 42)

The SVM is predicting 24 hours ahead using 24 hours of past history, with a step of 1 (one hour)

In [None]:
svm_past_history = 24
svm_future_target = 24
svm_STEP = 1

In [None]:
usageData.drop(['Usage24Ahead'], axis = 1, inplace = True)

In [None]:
usageDataArray = np.array(usageData)

In [None]:
def multivariate_data_no_shift(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i, step)
    data.append(dataset[indices])

    if single_step:
      labels.append(target[i+target_size])
    else:
      labels.append(target[i:i+target_size])

  return np.array(data), np.array(labels)

The data is segmented into 24 hour windows to allow us to evaluate and visualize predictions

In [None]:
svm_x_train, svm_y_train = multivariate_data_no_shift(usageDataArray[:,0:4], usageDataArray[:, 0], 0,
                                                   TRAIN_SPLIT, svm_past_history,
                                                   svm_future_target, svm_STEP)
svm_x_val, svm_y_val = multivariate_data_no_shift(usageDataArray[:,0:4], usageDataArray[:, 0],
                                               TRAIN_SPLIT, None, svm_past_history,
                                               svm_future_target, svm_STEP)

In [None]:
print ('Single window of past history : {}'.format(svm_x_train[0].shape))

In [None]:
train_data_svm = tf.data.Dataset.from_tensor_slices((svm_x_train, svm_y_train))
train_data_svm = train_data_svm.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_svm = tf.data.Dataset.from_tensor_slices((svm_x_val, svm_y_val))
val_data_svm = val_data_svm.batch(BATCH_SIZE).repeat()

The SVM is then fitted

In [None]:
reg_svr = SVR()
reg_svr.fit(X_train_svm, y_train_svm)
y_pred_svm = reg_svr.predict(svm_x_val[1020])

Below is a visualization of one 24 hour period.

In [None]:
dates = np.arange(0,24)
    
plt.plot(dates, (svm_y_val[1020]), c='b', label='Data')
plt.plot(dates, y_pred_svm, c='r', label='Linear model')
    
plt.xlabel('Hours')
plt.ylabel('Usage')
plt.title('Support Vector Regression')
plt.legend()
plt.show()

We then make a prediction of the entire dataset and plot part of this

In [None]:
full_pred = reg_svr.predict(X_test_svm)

In [None]:
dates = np.arange(0,1748)
  
font = {'family': 'DejaVu Sans',
        'color':  'black',
        'weight': 'normal',
        'size': 25,
        }

plt.plot(dates[:120], y_test_svm[:120], c='b', label='Data')
plt.plot(dates[:120], full_pred[:120], c='r', label='Linear model')

plt.xticks(fontsize=14)
plt.xlabel('Hours',fontdict=font)
plt.ylabel('Usage (Normalized)',fontdict=font)
plt.title('Support Vector Regression',fontdict=font)
plt.legend()
plt.show()

We then make a batch of predictions which will be used for evaluation later

In [None]:
svm_comp_pred = []

for z in range(256):
    svm_comp_pred.append(reg_svr.predict(svm_x_val[z]))

# RNN - 24 Hour Training Window

Below we define some functions necessary for the RNN. multi_step_plot allows us to visualize the multi hour predictions. create_time_steps is used for plotting visualizations. plot_train_history is used to plot the error over the course of training a model. Multivariate_data is a method of segmenting the data for use in the RNN.

In [None]:
def multi_step_plot(history, true_future, prediction):
    plt.figure(figsize=(12, 6))
    num_in = create_time_steps(len(history))
    num_out = len(true_future)

    plt.plot(num_in, np.array(history[:, 1]), label='History')
    plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'b', label='True Future')
    
    if prediction.any():
        plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'r', label='Predicted Future')
    
    plt.legend(loc='upper left')
    plt.show()

In [None]:
def create_time_steps(length):
  return list(range(-length, 0))

In [None]:
def plot_train_history(history, title, mse = False, rmse = False):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    if mse:
        mse = history.history['mean_squared_error']
    if rmse:
        rmse = history.history['root_mean_squared_error']
        val_rmse = history.history['val_root_mean_squared_error']
    
    epochs = range(len(loss))

    plt.figure()

    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    if mse:
        plt.plot(epochs, mse, 'y', label='Mean Squared Error')
    if rmse:
        plt.plot(epochs, rmse, 'g', label='Root Mean Squared Error')
        plt.plot(epochs, val_rmse, 'm', label='Validation Root Mean Squared Error')
    plt.title(title)
    plt.legend()
    
    plt.xlabel('Epochs')
    plt.ylabel('Mean Absolute Error')

    plt.show()

In [None]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i, step)
    data.append(dataset[indices])

    if single_step:
      labels.append(target[i+target_size])
    else:
      labels.append(target[i:i+target_size])

  return np.array(data), np.array(labels)

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

Below we segment the data, layout the model and train it.

In [None]:
mvms_past_history = 24
mvms_future_target = 24
mvms_STEP = 1

In [None]:
x_train_multi, y_train_multi = multivariate_data(usageDataArray, usageDataArray[:, 0], 0,
                                                 TRAIN_SPLIT, mvms_past_history,
                                                 mvms_future_target, mvms_STEP)
x_val_multi, y_val_multi = multivariate_data(usageDataArray, usageDataArray[:, 0],
                                             TRAIN_SPLIT, None, mvms_past_history,
                                             mvms_future_target, mvms_STEP)

In [None]:
print ('Single window of past history : {}'.format(x_train_multi[0].shape))
print ('\n Target usage to predict : {}'.format(y_train_multi[0].shape))

In [None]:
train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

## Model Design

In [None]:
multi_step_model = tf.keras.models.Sequential()

multi_step_model.add(tf.keras.layers.Conv1D(filters=10, kernel_size=3, activation='relu', input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

multi_step_model.add(tf.keras.layers.Conv1D(filters=20, kernel_size=3, activation='relu', input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

multi_step_model.add(tf.keras.layers.LSTM(200, activation='relu',return_sequences=True))
multi_step_model.add(tf.keras.layers.LSTM(25, activation='relu'))

multi_step_model.add(tf.keras.layers.Dropout(rate=0.3))

multi_step_model.add(tf.keras.layers.Dense(24))

multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae', metrics=['mean_squared_error',
                                                                                                    root_mean_squared_error])

In [None]:
for x, y in val_data_multi.take(1):
  print (multi_step_model.predict(x).shape)

In [None]:
multi_step_history = multi_step_model.fit(train_data_multi, epochs=10,
                                          steps_per_epoch=200,
                                          validation_data=val_data_multi,
                                          validation_steps=50)

In [None]:
plot_train_history(multi_step_history, 'Multi-Step Training and validation loss')

We make a batch of predictions for comparison.

In [None]:
for x, y in val_data_multi.take(1):
    rnn_data = []
    rnn_comp_pred = []
    for a in range(len(x)):
        rnn_data.append(y[a])
        rnn_comp_pred.append(multi_step_model.predict(x)[a])

The same RNN training and predictions process is then repeated for 48 hour and 72 hours of past history training windows.

# RNN - 48 Hour Training Window

In [None]:
long_mvms_past_history = 48
mvms_future_target = 24
mvms_STEP = 1

In [None]:
long_x_train_multi, long_y_train_multi = multivariate_data(usageDataArray, usageDataArray[:, 0], 0,
                                                 TRAIN_SPLIT, long_mvms_past_history,
                                                 mvms_future_target, mvms_STEP)
long_x_val_multi, long_y_val_multi = multivariate_data(usageDataArray, usageDataArray[:, 0],
                                             TRAIN_SPLIT, None, long_mvms_past_history,
                                             mvms_future_target, mvms_STEP)

In [None]:
print ('Single window of past history : {}'.format(long_x_train_multi[0].shape))
print ('\n Target usage to predict : {}'.format(long_y_train_multi[0].shape))

In [None]:
long_train_data_multi = tf.data.Dataset.from_tensor_slices((long_x_train_multi, long_y_train_multi))
long_train_data_multi = long_train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

long_val_data_multi = tf.data.Dataset.from_tensor_slices((long_x_val_multi, long_y_val_multi))
long_val_data_multi = long_val_data_multi.batch(BATCH_SIZE).repeat()

In [None]:
for x, y in long_train_data_multi.take(1):
    multi_step_plot(x[0], y[0], np.array([0]))

In [None]:
long_multi_step_model = tf.keras.models.Sequential()

long_multi_step_model.add(tf.keras.layers.Conv1D(filters=10, kernel_size=3, activation='relu', input_shape=long_x_train_multi.shape[-2:]))
long_multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
long_multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

long_multi_step_model.add(tf.keras.layers.Conv1D(filters=20, kernel_size=3, activation='relu', input_shape=long_x_train_multi.shape[-2:]))
long_multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
long_multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

long_multi_step_model.add(tf.keras.layers.LSTM(100, activation='relu',return_sequences=True))
long_multi_step_model.add(tf.keras.layers.LSTM(100, activation='relu'))

long_multi_step_model.add(tf.keras.layers.Dropout(rate=0.3))

long_multi_step_model.add(tf.keras.layers.Dense(24))


long_multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae', metrics=['mean_squared_error',
                                                                                                    root_mean_squared_error])

In [None]:
for x, y in long_val_data_multi.take(1):
  print (long_multi_step_model.predict(x).shape)

In [None]:
long_multi_step_history = long_multi_step_model.fit(long_train_data_multi, epochs=10,
                                          steps_per_epoch=200,
                                          validation_data=long_val_data_multi,
                                          validation_steps=50)

In [None]:
plot_train_history(long_multi_step_history, 'Multi-Step Training and validation loss', rmse= True)

In [None]:
long_rnn_data = []
long_rnn_comp_pred = []

for x, y in long_val_data_multi.take(1):
    for a in range(len(x)):
        long_rnn_data.append(y[a])
        long_rnn_comp_pred.append(long_multi_step_model.predict(x)[a])

In [None]:
dates = np.arange(0,24)
    
plt.plot(dates, long_rnn_data[200], c='b', label='Data')
plt.plot(dates, long_rnn_comp_pred[200], c= 'y', label='RNN model')
    
plt.xlabel('Hours')
plt.ylabel('Usage')
plt.title('Prediction Comparison')
plt.legend()
plt.show()

# RNN - 72 Hour Training Window

In [None]:
three_day_mvms_past_history = 72
mvms_future_target = 24
mvms_STEP = 1

In [None]:
three_day_x_train_multi, three_day_y_train_multi = multivariate_data(usageDataArray, usageDataArray[:, 0], 0,
                                                 TRAIN_SPLIT, three_day_mvms_past_history,
                                                 mvms_future_target, mvms_STEP)
three_day_x_val_multi, three_day_y_val_multi = multivariate_data(usageDataArray, usageDataArray[:, 0],
                                             TRAIN_SPLIT, None, three_day_mvms_past_history,
                                             mvms_future_target, mvms_STEP)

In [None]:
print ('Single window of past history : {}'.format(three_day_x_train_multi[0].shape))
print ('\n Target usage to predict : {}'.format(three_day_y_train_multi[0].shape))

In [None]:
three_day_train_data_multi = tf.data.Dataset.from_tensor_slices((three_day_x_train_multi, three_day_y_train_multi))
three_day_train_data_multi = three_day_train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

three_day_val_data_multi = tf.data.Dataset.from_tensor_slices((three_day_x_val_multi, three_day_y_val_multi))
three_day_val_data_multi = three_day_val_data_multi.batch(BATCH_SIZE).repeat()

In [None]:
for x, y in three_day_train_data_multi.take(1):
    multi_step_plot(x[0], y[0], np.array([0]))

In [None]:
three_day_multi_step_model = tf.keras.models.Sequential()

three_day_multi_step_model.add(tf.keras.layers.Conv1D(filters=10, kernel_size=3, activation='relu', input_shape=three_day_x_train_multi.shape[-2:]))
three_day_multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
three_day_multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

three_day_multi_step_model.add(tf.keras.layers.Conv1D(filters=20, kernel_size=3, activation='relu', input_shape=three_day_x_train_multi.shape[-2:]))
three_day_multi_step_model.add(tf.keras.layers.MaxPooling1D(pool_size=2, strides=2))
three_day_multi_step_model.add(tf.keras.layers.Dropout(rate=0.1))

three_day_multi_step_model.add(tf.keras.layers.LSTM(100, activation='relu',return_sequences=True))
three_day_multi_step_model.add(tf.keras.layers.LSTM(100, activation='relu'))

three_day_multi_step_model.add(tf.keras.layers.Dropout(rate=0.3))

three_day_multi_step_model.add(tf.keras.layers.Dense(24))


three_day_multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae', metrics=['mean_squared_error',
                                                                                                    root_mean_squared_error])

In [None]:
for x, y in three_day_val_data_multi.take(1):
  print (three_day_multi_step_model.predict(x).shape)

In [None]:
three_day_multi_step_history = three_day_multi_step_model.fit(three_day_train_data_multi, epochs=10,
                                          steps_per_epoch=200,
                                          validation_data=three_day_val_data_multi,
                                          validation_steps=50)

In [None]:
plot_train_history(three_day_multi_step_history, 'Multi-Step Training and validation loss', rmse= True)

In [None]:
three_day_rnn_data = []
three_day_rnn_comp_pred = []

for x, y in three_day_val_data_multi.take(1):
    for a in range(len(x)):
        three_day_rnn_data.append(y[a])
        three_day_rnn_comp_pred.append(three_day_multi_step_model.predict(x)[a])

In [None]:
dates = np.arange(0,24)
    
plt.plot(dates, three_day_rnn_data[200], c='b', label='Data')
plt.plot(dates, three_day_rnn_comp_pred[200], c= 'y', label='RNN model')
    
plt.xlabel('Hours')
plt.ylabel('Usage')
plt.title('Prediction Comparison')
plt.legend()
plt.show()

# Evaluation

The following functions take in 24 hour periods of data and evaluate predictions made over them for three different metrics.

In [None]:
def twentyfour_hour_mae(y_pred, y_true):
    mae = []
    for a in range(len(y_pred)):
        mae.append(mean_absolute_error(y_pred[a],y_true[a]))
    
    return (np.array(mae))

In [None]:
def twentyfour_hour_rmse(y_pred, y_true):
    rmse = []
    for a in range(len(y_pred)):
        rmse.append(simple_rmse(y_pred[a],y_true[a]))
    
    return (np.array(rmse))

In [None]:
def twentyfour_hour_r_squared(y_pred, y_true):
    r_squared = []
    for a in range(len(y_pred)):
        r_squared.append(get_r_squared(y_pred[a],y_true[a]))
    
    return (np.array(r_squared))

In [None]:
def smape(actual, forecasted):
    return 1/len(actual) * np.sum(2 * np.abs(forecasted - actual) / (np.abs(actual) + np.abs(forecasted)))

In [None]:
def twentyfour_hour_smape(y_pred, y_true):
    smape_arr = []
    for a in range(len(y_pred)):
        smape_arr.append(smape(y_pred[a],y_true[a]))
    
    return (np.array(smape_arr))

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
from math import sqrt

def simple_rmse(y_true, y_pred):
    return sqrt(mean_squared_error(y_true, y_pred))

We then calculate the error of each model across their batches of predictions.

In [None]:
rnn_batch_mae = (twentyfour_hour_mae(rnn_comp_pred,long_rnn_data))
rnn_batch_rmse = (twentyfour_hour_rmse(rnn_comp_pred,long_rnn_data))
rnn_batch_smape = (twentyfour_hour_smape(long_rnn_data,rnn_comp_pred))

two_day_rnn_batch_mae = (twentyfour_hour_mae(long_rnn_comp_pred,long_rnn_data))
two_day_rnn_batch_rmse = (twentyfour_hour_rmse(long_rnn_comp_pred,long_rnn_data))
two_day_rnn_batch_smape = (twentyfour_hour_smape(long_rnn_data,long_rnn_comp_pred))

three_day_rnn_batch_mae = (twentyfour_hour_mae(three_day_rnn_comp_pred,long_rnn_data))
three_day_rnn_batch_rmse = (twentyfour_hour_rmse(three_day_rnn_comp_pred,long_rnn_data))
three_day_rnn_batch_smape = (twentyfour_hour_smape(three_day_rnn_data,long_rnn_comp_pred))

svm_batch_mae = (twentyfour_hour_mae(svm_comp_pred,long_rnn_data))
svm_batch_rmse = (twentyfour_hour_rmse(svm_comp_pred,long_rnn_data))
svm_batch_smape = (twentyfour_hour_smape(long_rnn_data,svm_comp_pred))

We plot the cumulative error for models across their prediction batches

In [None]:
dates = np.arange(0,256)

plt.plot(dates, rnn_batch_mae.cumsum(), color = 'b', label='RNN Error')
plt.plot(dates, svm_batch_mae.cumsum(), color = 'g', label='SVM Error')

plt.xlabel('24 Hour Periods')
plt.ylabel('MAE')
plt.title('Error Comparison')
plt.legend()
plt.show()

In [None]:
dates = np.arange(0,256)

plt.plot(dates, rnn_batch_rmse.cumsum(), color = 'b', label='RNN Error')
plt.plot(dates, svm_batch_rmse.cumsum(), color = 'g', label='SVM Error')

plt.xlabel('24 Hour Periods')
plt.ylabel('RMSE')
plt.title('Error Comparison')
plt.legend()
plt.show()

We then collate and graph the results of the error across each different model

In [None]:
svmBars = [np.mean(svm_batch_mae), np.mean(svm_batch_rmse)]
rnnBars = [np.mean(two_day_rnn_batch_mae), np.mean(two_day_rnn_batch_rmse)]

groupedBarErrorData = np.array([["SVM", "MAE", np.mean(svm_batch_mae)],
                                ["SVM","RMSE", np.mean(svm_batch_rmse)], 
                                ["SVM", "SMAPE", np.mean(svm_batch_smape)], 
                                ["24H-RNN", "MAE", np.mean(rnn_batch_mae)],
                                ["24H-RNN", "RMSE", np.mean(rnn_batch_rmse)],
                                ["24H-RNN", "SMAPE", np.mean(rnn_batch_smape)]
                               ])

rnnGroupedBarErrorData = np.array([["24H-RNN", "MAE", np.mean(rnn_batch_mae)],
                                ["24H-RNN", "RMSE", np.mean(rnn_batch_rmse)],
                                ["24H-RNN", "SMAPE", np.mean(rnn_batch_smape)],
                                ["48H-RNN", "MAE", np.mean(two_day_rnn_batch_mae)],
                                ["48H-RNN", "RMSE", np.mean(two_day_rnn_batch_rmse)],
                                ["48H-RNN", "SMAPE", np.mean(two_day_rnn_batch_smape)],
                                ["72H-RNN", "MAE", np.mean(three_day_rnn_batch_mae)],
                                ["72H-RNN", "RMSE", np.mean(three_day_rnn_batch_rmse)],
                                ["72H-RNN", "SMAPE", np.mean(three_day_rnn_batch_smape)]])

groupedBarErrorDataFrame = pd.DataFrame(groupedBarErrorData, columns=["Model", "Metric", "Accuracy"])
rnnGroupedBarErrorDataFrame = pd.DataFrame(rnnGroupedBarErrorData, columns=["Model", "Metric", "Accuracy"])

groupedBarErrorDataFrame.head(10)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

g = sns.catplot(x="Metric", y="Accuracy", hue="Model", data=groupedBarErrorDataFrame,
                height=6, kind="bar", palette="bright")
g.despine(left=True)
g.set_ylabels("Error\n", fontsize = 16)
g.set_xlabels("\nMetric", fontsize = 16)

In [None]:
import seaborn as sns
sns.set(style="whitegrid")

g = sns.catplot(x="Metric", y="Accuracy", hue="Model", data=rnnGroupedBarErrorDataFrame,
                height=6, kind="bar", palette="muted")
g.despine(left=True)
g.set_ylabels("Error\n", fontsize = 16)
g.set_xlabels("\nMetric", fontsize = 16)