In [None]:
%pip install ipython
%pip install numpy
%pip install pandas
%pip install nbformat
%pip install statsmodels
%pip install mlforecast
%pip install lightgbm
%pip install sktime
%pip install scikit-learn
%pip install matplotlib
%pip install plotly
%pip install holidays

%pip install tensorflow
%pip install tensorboard

In [1]:
import re
import numpy as np
import pandas as pd

from tqdm import tqdm
import holidays
th_holidays = holidays.TH()

import plotly.express as px
import plotly.io as pio
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
physical_device = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_device[0], True)

from keras.models import Sequential, Model
from keras.layers import Dense, LSTM, Dropout, Flatten, Input, MultiHeadAttention, LayerNormalization, Concatenate, concatenate
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.models import load_model


# Helper functions
def if_work_day(date): 
    if ((date in th_holidays) or (date.weekday()>=5)):
        return 0
    else: return 1
    
def reshape_ts_X(input_vector, time_steps, batch_size=None):
    x = np.lib.stride_tricks.sliding_window_view(input_vector[:-1], 
            window_shape=(time_steps, * input_vector.shape[1:]))
    x = x.reshape(-1, time_steps, * input_vector.shape[1:]) # removes extra dimension
    if (batch_size is not None) and (batch_size > 1):
        x = np.array_split(x, len(x) // batch_size)
    # else: x = x[np.newaxis, :, :] 
    return x

def reshape_ts_Y(target_vector, time_steps, batch_size=None):
    y = target_vector[time_steps:]
    if (batch_size is not None) and (batch_size > 1):
        y = np.array_split(y, len(y) // batch_size)
    # else: y = y[np.newaxis, :]
    return y

In [2]:
grayscale_template = {
    "layout": {
        "paper_bgcolor": "white",  # Background color
        "plot_bgcolor": "white",   # Plot background color
        "font": {
            "color": "black"  # Text color
        },
        "xaxis": {
            "linecolor": "black",  # X-axis line color
            "gridcolor": "lightgray",  # X-axis grid color
            "zerolinecolor": "gray"    # X-axis zero line color
        },
        "yaxis": {
            "linecolor": "black",  # Y-axis line color
            "gridcolor": "lightgray",  # Y-axis grid color
            "zerolinecolor": "gray"    # Y-axis zero line color
        }
    }
}

# Set the default template to the grayscale template
pio.templates["grayscale"] = grayscale_template
pio.templates.default = "grayscale"

#### Data import

In [3]:
merged_df = pd.read_csv('data/clean_merged_dataset.csv',parse_dates = ['Date']).set_index('Date')

# Combine by type (Light, Plug, AC)
merged_df.columns = [re.sub(r"\d+$", "", column) for column in merged_df.columns]
merged_df = merged_df.groupby(by=merged_df.columns, axis=1).sum()

tuple_columns = [(x[0][-1], x[1][-1], x[2]) for x in [column.split('_') for column in merged_df.columns]]
merged_df.columns = pd.MultiIndex.from_tuples(tuple_columns, names=['Floor', 'Zone','Sensor'])


merged_df = merged_df.resample('D').sum() 
merged_df = merged_df.iloc[:-2] # For clean weeks in testing

total_load_df = merged_df.sum(axis=1)
total_by_zone_df = merged_df.groupby(level=1, axis = 1).sum()
total_by_type_df = merged_df.groupby(level=2, axis = 1).sum()

test_len = 7*37

#### DATA BY TOTAL

In [4]:
### DATA BY TOTAL

data = total_load_df.reset_index()
data.columns = ['Date', 'Total']
data['If Work Day'] = data['Date'].map(if_work_day)
X = data.iloc[:, 1:].to_numpy()
next_if_work_day = data['If Work Day'].to_numpy().reshape(-1, 1)
y = np.array(data[['Total']])

scaler = MinMaxScaler()
scaler.fit(X)
X_std = scaler.transform(X)
X_std

time_steps = 7

X_train, X_test = X_std[:-test_len], X_std[-test_len-time_steps:]

next_if_work_day_train = next_if_work_day[:-test_len]
next_if_work_day_test = next_if_work_day[-test_len-time_steps:]
    
y_train, y_test = y[:-test_len], y[-test_len-time_steps:]

X_train_batched_ts = reshape_ts_X(X_train, time_steps)
next_if_work_day_train_ts = reshape_ts_Y(next_if_work_day_train, time_steps)
y_train_batched_ts = reshape_ts_Y(y_train, time_steps)

X_test_batched_ts = reshape_ts_X(X_test, time_steps, 7)
next_if_work_day_test_ts = reshape_ts_Y(next_if_work_day_test, time_steps, 7)
y_test_batched_ts = reshape_ts_Y(y_test, time_steps, 7)

#### DATA BY ZONE

In [4]:
### DATA BY ZONE

data = total_by_zone_df.reset_index()
data['If Work Day'] = data['Date'].map(if_work_day)
X = data.iloc[:, 1:].to_numpy()
next_if_work_day = data['If Work Day'].to_numpy().reshape(-1, 1)
y = data.iloc[:, 1:-1].to_numpy()


scaler = MinMaxScaler()
scaler.fit(X)
X_std = scaler.transform(X)
X_std

time_steps = 7

X_train, X_test = X_std[:-test_len], X_std[-test_len-time_steps:]

next_if_work_day_train = next_if_work_day[:-test_len]
next_if_work_day_test = next_if_work_day[-test_len-time_steps:]
    
y_train, y_test = y[:-test_len], y[-test_len-time_steps:]


X_train_batched_ts = reshape_ts_X(X_train, time_steps)
next_if_work_day_train_ts = reshape_ts_Y(next_if_work_day_train, time_steps)
y_train_batched_ts = reshape_ts_Y(y_train, time_steps)

X_test_batched_ts = reshape_ts_X(X_test, time_steps, 7)
next_if_work_day_test_ts = reshape_ts_Y(next_if_work_day_test, time_steps, 7)
y_test_batched_ts = reshape_ts_Y(y_test, time_steps, 7)

#### DATA BY TYPE

In [None]:
### DATA BY TYPE

data = total_by_type_df.reset_index()
data['If Work Day'] = data['Date'].map(if_work_day)
X = data.iloc[:, 1:].to_numpy()
next_if_work_day = data['If Work Day'].to_numpy().reshape(-1, 1)
y = np.array(data[['AC', 'Light', 'Plug']])

scaler = MinMaxScaler()
scaler.fit(X)
X_std = scaler.transform(X)
X_std

time_steps = 7

X_train, X_test = X_std[:-test_len], X_std[-test_len-time_steps:]

next_if_work_day_train = next_if_work_day[:-test_len]
next_if_work_day_test = next_if_work_day[-test_len-time_steps:]
    
y_train, y_test = y[:-test_len], y[-test_len-time_steps:]

X_train_batched_ts = reshape_ts_X(X_train, time_steps)
next_if_work_day_train_ts = reshape_ts_Y(next_if_work_day_train, time_steps)
y_train_batched_ts = reshape_ts_Y(y_train, time_steps)

X_test_batched_ts = reshape_ts_X(X_test, time_steps, 7)
next_if_work_day_test_ts = reshape_ts_Y(next_if_work_day_test, time_steps, 7)
y_test_batched_ts = reshape_ts_Y(y_test, time_steps, 7)

#### MODEL - LSTM

In [20]:
# MODEL - LSTM

input1 = Input(shape=(time_steps, X.shape[1]))
input2 = Input(shape=(1,))  

lstm1 = LSTM(units=128, return_sequences=True)(input1)
lstm1 = Dropout(0.2)(lstm1)

lstm2 = LSTM(units=64, return_sequences=True)(lstm1)
lstm2 = Dropout(0.2)(lstm2)

lstm3 = LSTM(units=32, return_sequences=True)(lstm2)
lstm3 = Dropout(0.2)(lstm3)

lstm4 = LSTM(units=64, return_sequences=True)(lstm3)
lstm4 = Dropout(0.2)(lstm4)

lstm5 = LSTM(units=32, return_sequences=False)(lstm4)
lstm5 = Dropout(0.2)(lstm5)

fl_1 = Flatten()(lstm5)
concatenated = concatenate([fl_1, input2])

dense_1 = Dense(units=16)(concatenated)
dense_2 = Dense(units=8)(dense_1)
dense_3 = Dense(units=5)(dense_2)
output = Dense(units=y.shape[1])(dense_3)

model = Model(inputs=[input1, input2], outputs=output)

model.compile(loss='mae', optimizer=Adam(), metrics=['mape'])

#### MODEL - Transformer

In [5]:
### MODEL - Transformer

# Define the input layer
input_ts = Input(shape=(time_steps, X.shape[1]))
input_static = Input(shape=(1,))

# Transformer block
x = MultiHeadAttention(num_heads=8, key_dim=128)(input_ts, input_ts)
x = Dropout(0.2)(x)
x = LayerNormalization(epsilon=1e-6)(x)

fl_1 = Flatten()(x)
fl_2 = Flatten()(input_static)
x = Concatenate(axis=-1)([fl_1, fl_2])

# Additional dense layers
x = Dense(128, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)

# Output layer
output = Dense(units=y.shape[1])(x)

# Create and compile the model
model = Model(inputs=[input_ts, input_static], outputs=output)
model.compile(loss='mae', optimizer='adam', metrics=['mape'])

In [8]:
# ### WEEKLY PREDICT

# combined_history = {'loss': [], 'mape': []}
# history = model.fit(
#         x=[X_train_batched_ts, next_if_work_day_train_ts],
#         y=y_train_batched_ts,
#         epochs=4000,
#         verbose=1) # TODO: change epochs
# for key in combined_history.keys():
#     combined_history[key].extend(history.history[key])

# X_lagged_input = X_train_batched_ts[-1].tolist()
# if_work_day_input = next_if_work_day_train_ts[-1]

predictions = []
for t_week in tqdm(range(int(test_len/7)), desc="Processing Weeks"):
    for t_day in range(7):
        # Predict next day
        next_pred = model([np.expand_dims(X_lagged_input, axis=0), 
                           np.expand_dims(if_work_day_input, axis=0)]).numpy().tolist()
        predictions.append(next_pred)
        
        # Update input
        input_X = next_pred[0] + next_if_work_day_test[t_week*7+t_day].tolist()
        X_lagged_input = X_lagged_input[1:] + scaler.transform([input_X]).tolist()
        
        if_work_day_input = next_if_work_day_test[t_week*7+t_day+1]
    
    # Train on current week
    history = model.fit(
        x=[X_test_batched_ts[t_week], next_if_work_day_test_ts[t_week]],
        y=y_test_batched_ts[t_week],
        epochs=400,
        verbose=0) # TODO: change epochs
    for key in combined_history.keys():
        combined_history[key].extend(history.history[key])
        print(f"{key} - {combined_history[key][-1]}")
        
history_df = pd.DataFrame(combined_history)
px.line(history_df['loss'], title='Loss').show()
px.line(history_df['mape'], title='Mape').show()
px.line(np.array(predictions).sum(axis=2), title='Prediction').show()

Processing Weeks:   3%|▎         | 1/37 [00:08<05:02,  8.40s/it]

loss - 54.82292556762695
mape - 24.45538330078125


Processing Weeks:   5%|▌         | 2/37 [00:15<04:26,  7.61s/it]

loss - 67.61811828613281
mape - 12.377725601196289


Processing Weeks:   8%|▊         | 3/37 [00:21<03:54,  6.91s/it]

loss - 66.93217468261719
mape - 5.82842493057251


Processing Weeks:  11%|█         | 4/37 [00:28<03:53,  7.07s/it]

loss - 89.115966796875
mape - 19.610836029052734


Processing Weeks:  14%|█▎        | 5/37 [00:38<04:13,  7.94s/it]

loss - 88.82926940917969
mape - 10.732290267944336


Processing Weeks:  16%|█▌        | 6/37 [00:44<03:48,  7.36s/it]

loss - 78.0472640991211
mape - 24.214900970458984


Processing Weeks:  19%|█▉        | 7/37 [00:52<03:44,  7.50s/it]

loss - 61.84279251098633
mape - 10.710753440856934


Processing Weeks:  22%|██▏       | 8/37 [01:04<04:16,  8.85s/it]

loss - 95.05472564697266
mape - 13.208022117614746


Processing Weeks:  24%|██▍       | 9/37 [01:12<04:07,  8.83s/it]

loss - 84.24369812011719
mape - 21.111833572387695


Processing Weeks:  27%|██▋       | 10/37 [01:22<04:05,  9.08s/it]

loss - 80.896240234375
mape - 7.550234317779541


Processing Weeks:  30%|██▉       | 11/37 [01:32<04:03,  9.36s/it]

loss - 83.46475982666016
mape - 9.970361709594727


Processing Weeks:  32%|███▏      | 12/37 [01:40<03:47,  9.10s/it]

loss - 60.246341705322266
mape - 7.143639087677002


Processing Weeks:  35%|███▌      | 13/37 [01:49<03:36,  9.00s/it]

loss - 73.33911895751953
mape - 10.628579139709473


Processing Weeks:  38%|███▊      | 14/37 [01:57<03:18,  8.61s/it]

loss - 83.18834686279297
mape - 12.784944534301758


Processing Weeks:  41%|████      | 15/37 [02:06<03:10,  8.67s/it]

loss - 88.95597076416016
mape - 17.95924949645996


Processing Weeks:  43%|████▎     | 16/37 [02:15<03:02,  8.71s/it]

loss - 63.24969482421875
mape - 30.26967430114746


Processing Weeks:  46%|████▌     | 17/37 [02:25<03:03,  9.17s/it]

loss - 56.33114242553711
mape - 15.074394226074219


Processing Weeks:  49%|████▊     | 18/37 [02:34<02:53,  9.14s/it]

loss - 60.8880615234375
mape - 10.463302612304688


Processing Weeks:  51%|█████▏    | 19/37 [02:42<02:40,  8.91s/it]

loss - 61.35544204711914
mape - 6.947913646697998


Processing Weeks:  54%|█████▍    | 20/37 [02:50<02:26,  8.63s/it]

loss - 62.46134567260742
mape - 8.255083084106445


Processing Weeks:  57%|█████▋    | 21/37 [02:59<02:17,  8.56s/it]

loss - 48.61511993408203
mape - 13.5523042678833


Processing Weeks:  59%|█████▉    | 22/37 [03:06<02:01,  8.12s/it]

loss - 77.96829986572266
mape - 9.240156173706055


Processing Weeks:  62%|██████▏   | 23/37 [03:13<01:51,  7.94s/it]

loss - 50.87703323364258
mape - 9.474930763244629


Processing Weeks:  65%|██████▍   | 24/37 [03:22<01:45,  8.08s/it]

loss - 71.32770538330078
mape - 6.757086277008057


Processing Weeks:  68%|██████▊   | 25/37 [03:28<01:32,  7.67s/it]

loss - 55.6748046875
mape - 6.116030693054199


Processing Weeks:  70%|███████   | 26/37 [03:36<01:23,  7.61s/it]

loss - 61.78285598754883
mape - 12.604533195495605


Processing Weeks:  73%|███████▎  | 27/37 [03:45<01:21,  8.15s/it]

loss - 52.036659240722656
mape - 24.05117416381836


Processing Weeks:  76%|███████▌  | 28/37 [03:53<01:11,  7.92s/it]

loss - 74.03944396972656
mape - 9.8671293258667


Processing Weeks:  78%|███████▊  | 29/37 [04:00<01:01,  7.74s/it]

loss - 49.82577896118164
mape - 7.060957431793213


Processing Weeks:  81%|████████  | 30/37 [04:08<00:53,  7.68s/it]

loss - 65.40270233154297
mape - 7.896965980529785


Processing Weeks:  84%|████████▍ | 31/37 [04:16<00:48,  8.06s/it]

loss - 99.03076171875
mape - 8.805926322937012


Processing Weeks:  86%|████████▋ | 32/37 [04:25<00:40,  8.05s/it]

loss - 44.38007354736328
mape - 4.741428852081299


Processing Weeks:  89%|████████▉ | 33/37 [04:33<00:32,  8.04s/it]

loss - 42.76724624633789
mape - 6.000000476837158


Processing Weeks:  92%|█████████▏| 34/37 [04:41<00:24,  8.31s/it]

loss - 41.50034713745117
mape - 5.5208234786987305


Processing Weeks:  95%|█████████▍| 35/37 [04:49<00:16,  8.11s/it]

loss - 44.69401931762695
mape - 6.034533977508545


Processing Weeks:  97%|█████████▋| 36/37 [04:57<00:08,  8.03s/it]

loss - 62.73748779296875
mape - 7.889557361602783


Processing Weeks: 100%|██████████| 37/37 [05:06<00:00,  8.28s/it]

loss - 77.92162322998047
mape - 8.14619255065918



