# PJM Hourly Energy Consumption Case

PJM Interconnection LLC (PJM) is a regional transmission organization (RTO) in the United States. It is part of the Eastern Interconnection grid operating an electric transmission system serving all or parts of Delaware, Illinois, Indiana, Kentucky, Maryland, Michigan, New Jersey, North Carolina, Ohio, Pennsylvania, Tennessee, Virginia, West Virginia, and the District of Columbia.

The hourly power consumption data comes from PJM's website and are in megawatts (MW).

### LSTM Autoencoder Training Step - By Sabrina Otoni da Silva - 2024/04

In [27]:
from pathlib import Path 

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import keras.layers as L
from keras import Sequential, Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras_tuner import HyperModel, Hyperband

import pickle

import warnings
warnings.filterwarnings('ignore')

In [28]:
datapath = Path('../data/d02_intermediate')
modelpath = Path('../model')

In [29]:
df = pd.read_csv(f'{datapath}/pjme_train.csv')
df.set_index('datetime', inplace=True)
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df.dropna(axis=0, how='any', inplace=True)

In [30]:
def temporalize(X, y, lookback):
    '''
    To convert input data into 3-D
    array as required for LSTM network.
    '''
    output_X = []
    output_y = []
    for i in range(len(X)-lookback-1):
        t = []
        for j in range(1,lookback+1):
            t.append(X[[(i+j+1)], :])
        output_X.append(t)
        output_y.append(y[i+lookback+1])
        
    return output_X, output_y

In [31]:
def flatten(X):
    '''
    Flatten a 3D array.
    Input
    X - A 3D array for lstm, where the array is sample x timesteps x features.

    Output
    flattened_X - A 2D array, sample x features.
    '''
    flattened_X = np.empty((X.shape[0], X.shape[2]))  # Sample x features array.
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

def scale(X, scaler):
    '''
    Scale 3D array.
    Inputs
    X - A 3D array for lstm, where the array is sample x timesteps x features.
    scaler - A scaler object, e.g., sklearn.preprocessing.StandardScaler, sklearn.preprocessing.normalize
    
    Output
    X - Scaled 3D array.
    '''
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
        
    return X

In [32]:
n_features = df.shape[1] - 1
timesteps = 24

In [33]:
X_train, y_train = temporalize(X = np.array(df[['hour', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'day', 'weekofyear', 'lag1', 'lag2', 'lag3']]), 
                   y = np.array(df[['pjme_mw']]), 
                   lookback = timesteps)

X_train = np.array(X_train)
X_train = X_train.reshape(X_train.shape[0], timesteps, n_features)
y_train = np.array(y_train)

In [8]:
scaler_x = StandardScaler().fit(flatten(X_train))
X_train = scale(X_train, scaler_x)

In [9]:
train, valid, y_train, y_valid = train_test_split(X_train, y_train.values, test_size=0.10, shuffle=False)

In [15]:
X_train = train
X_valid = valid

In [10]:
model = Sequential()
model.add(L.Input(shape=(timesteps, n_features)))

# Encoder
model.add(L.LSTM(24, activation='relu', input_shape=(timesteps, n_features), return_sequences=True))
model.add(L.LSTM(6, activation='relu', return_sequences=False))
model.add(L.RepeatVector(timesteps))

# Decoder
model.add(L.LSTM(6, activation='relu', return_sequences=True))
model.add(L.LSTM(24, activation='relu', return_sequences=True))
model.add(L.TimeDistributed(L.Dense(1, activation='linear')))
model.summary()

model.compile(optimizer=Adam(0.0001), loss='mse')

lstm_autoencoder = model.fit(X_train, X_train, 
                                              batch_size=128,
                                              epochs=50, 
                                              verbose=1)

Epoch 1/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 75ms/step - loss: 0.9192 - val_loss: 0.9941
Epoch 2/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 69ms/step - loss: 0.8265 - val_loss: 0.9900
Epoch 3/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 72ms/step - loss: 0.8167 - val_loss: 0.9854
Epoch 4/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 74ms/step - loss: 0.8081 - val_loss: 0.9822
Epoch 5/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 74ms/step - loss: 0.8054 - val_loss: 0.9786
Epoch 6/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 69ms/step - loss: 0.7995 - val_loss: 0.9752
Epoch 7/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 68ms/step - loss: 0.7937 - val_loss: 0.9691
Epoch 8/100
[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 65ms/step - loss: 0.7870 - val_loss: 0.9640
Epoch 9/100
[1m

In [14]:
rpt_vector_layer = Model(inputs=model.inputs, outputs=model.layers[3].output)
time_dist_layer = Model(inputs=model.inputs, outputs=model.layers[5].output)
model.layers

[<LSTM name=lstm, built=True>,
 <LSTM name=lstm_1, built=True>,
 <RepeatVector name=repeat_vector, built=True>,
 <LSTM name=lstm_2, built=True>,
 <LSTM name=lstm_3, built=True>,
 <TimeDistributed name=time_distributed, built=True>]

In [15]:
encoder = Model(inputs=model.inputs, outputs=model.layers[2].output)

In [16]:
train_encoded = encoder.predict(X_train)
validation_encoded = encoder.predict(X_valid)
print('Encoded time-series shape', train_encoded.shape)
print('Encoded time-series sample', train_encoded[0])

[1m3204/3204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
Encoded time-series shape (102527, 24, 6)
Encoded time-series sample [[9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324807  1.4281957  2.806819   0.6712399  0.85642487]
 [9.235951   5.0324

In [None]:
# X_train['encoded'] = train_encoded
# X_train['label'] = y_train

# X_valid['encoded'] = validation_encoded
# X_valid['label'] = y_valid

# X_train.head(10)

In [11]:
# class LSTMautoencoder(HyperModel):
#     def __init__(self, timesteps, n_features):
#         self.timesteps = timesteps
#         self.n_features = n_features
    
#     def build(self, hp):
#         model = Sequential()
#         model.add(L.Input(shape=(self.timesteps, self.n_features)))
#         # Encoder
#         model.add(L.LSTM(hp.Int('encoder_lstm_1_units', min_value=64, max_value=256, step=32),
#                         activation='relu',
#                         input_shape=(self.timesteps, self.n_features),
#                         return_sequences=True))
#         model.add(L.LSTM(hp.Int('encoder_lstm_2_units', min_value=32, max_value=128, step=32),
#                         activation='relu',
#                         return_sequences=False))
#         model.add(L.RepeatVector(self.timesteps))
#         # Decoder
#         model.add(L.LSTM(hp.Int('decoder_lstm_1_units', min_value=32, max_value=128, step=32),
#                         activation='relu',
#                         return_sequences=True))
#         model.add(L.LSTM(hp.Int('decoder_lstm_2_units', min_value=64, max_value=256, step=32),
#                         activation='relu',
#                         return_sequences=True))
#         model.add(L.TimeDistributed(L.Dense(1, activation='linear')))
        
#         model.compile(optimizer=Adam(
#                         hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
#                     loss='mse')
#         return model

In [11]:
# hypermodel = LSTMautoencoder(timesteps=timesteps, n_features=n_features)

In [12]:
# tuner = Hyperband(
#         hypermodel,
#         objective='val_loss',
#         max_epochs=40,
#         hyperband_iterations=2,
#         factor=3,
#         directory=f'{modelpath}/keras_tuner',
#         project_name='lstm_autoencoder'
#     )

In [None]:
# early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

# tuner.search(
#         x=X_train,
#         y=X_train,
#         epochs=50,
#         batch_size=128,
#         callbacks=[early_stopping],
#         validation_data=(X_valid, X_valid),
#         verbose=1
#     )

In [None]:
# best_model = tuner.get_best_models(num_models=1)[0]
# best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# best_model.save(f'{modelpath}/lstmautoencoder.h5')

# with open(f'{modelpath}/hyperparameters_lstmautoencoder.pkl', 'wb') as f:
#         pickle.dump(best_hyperparameters, f)

In [None]:
# plt.plot(history.history['loss'], label='Train Loss')
# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend()
# plt.show()