In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('cleaned_data\\cleaned_df.csv', parse_dates=['tstp'])

In [19]:
df.columns[-20:]

Index(['TRANSPORT_Travel To Work_Train / Tube / Tram',
       'TRANSPORT_Travel To Work_Work mainly at or from home', 'date', 'Type',
       'year', 'month', 'day', 'dayofweek_num', 'is_weekend', 'season',
       'no_work', 'hour', 'at_home', 'shifted_energy_24h',
       'shifted_energy_48h', 'shifted_energy_72h', 'shifted_energy_96h',
       'shifted_energy_120h', 'shifted_energy_144h', 'shifted_energy_168h'],
      dtype='object')

In [3]:
columns_desired = ['tstp', 'energy(kWh/hh)', 'LCLid', 'stdorToU', 'Acorn', 'visibility',
       'windBearing', 'temperature', 'dewPoint', 'pressure',
       'apparentTemperature', 'windSpeed', 'precipType', 'humidity',
       'summary', 'Type',
       'year', 'month', 'day', 'dayofweek_num', 'is_weekend', 'season',
       'no_work', 'hour', 'at_home', 'shifted_energy_24h',
       'shifted_energy_48h', 'shifted_energy_72h', 'shifted_energy_96h',
       'shifted_energy_120h', 'shifted_energy_144h', 'shifted_energy_168h']

sampled_df = df[columns_desired]

In [33]:
# get one of the household for simplicity

singlehousehold_df = sampled_df.groupby(['LCLid'])
singlehousehold_df = singlehousehold_df.get_group('MAC003686')
singlehousehold_df.shape

(11823, 32)

In [34]:
import math

# one hot encoding
singlehousehold_df = singlehousehold_df.join(pd.get_dummies(singlehousehold_df[['precipType', 'summary', 'Type']]))
singlehousehold_df = singlehousehold_df.drop(['precipType', 'summary', 'Type'], axis=1)

# get the first 80% and forecast the remaining 20%, must be in order
cutoff_point = math.floor(singlehousehold_df.shape[0] * 0.8)
training_singlehousehold_df = singlehousehold_df.iloc[:cutoff_point]
testing_singlehousehold_df = singlehousehold_df.iloc[cutoff_point:]

X_train = training_singlehousehold_df.drop(['tstp', 'LCLid', 'stdorToU', 'Acorn', 'energy(kWh/hh)'], axis=1)
y_train = training_singlehousehold_df['energy(kWh/hh)']

X_test = testing_singlehousehold_df.drop(['tstp', 'energy(kWh/hh)'], axis=1)
y_test = testing_singlehousehold_df['energy(kWh/hh)']

print("X train shape:", X_train.shape)
print("X test shape:", X_test.shape)
print("y train shape:", y_train.shape)
print("y test shape:", y_test.shape)

X train shape: (9458, 47)
X test shape: (2365, 50)
y train shape: (9458,)
y test shape: (2365,)


In [35]:
### Prepare the data for LSTM
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        end_ix = i + n_steps
        if end_ix > len(sequences):
            break

        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)

    return np.array(X), np.array(y)

In [59]:
from numpy import hstack

n_steps = 3 # use 3 days to predict the next days
dataset = hstack((X_train.values, np.expand_dims(y_train.values, axis=1)))
X, y = split_sequences(dataset, n_steps)
X.shape, y.shape

((9456, 3, 47), (9456,))


### References for modelling in Tensorflow
https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/  

Can try:
1. Encoder-Decoder
2. CNN-LSTM
3. ConvLSTM


In [65]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional

tf.random.set_seed(42)

model = Sequential([
    Bidirectional(LSTM(units=128, activation="relu", return_sequences=True, input_shape=(n_steps, 47))),
    Bidirectional(LSTM(units=64, activation="relu")),
    Dense(100, activation="relu"),
    Dense(47)
])

model.compile(
    loss=tf.keras.losses.MAE,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=['mae', 'mse', 'mape']
)

history = model.fit(
    X,
    y,
    epochs=50,
    batch_size=32
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
