In [1]:
# Load Packages
import pandas as pd
import numpy as np

import plotly.express as px

import warnings
warnings.filterwarnings('ignore')


from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error

from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import LSTM


import model_generalized
import model_prep

2023-06-19 15:14:29.746290: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
kissam_df = pd.read_csv(f'../data/kissam/kissam_tower_2_preprocessed.csv', index_col='time')
kissam_df.index = pd.to_datetime(kissam_df.index)

esb_df = pd.read_csv(f'../data/esb/esb_tower_2_preprocessed.csv', index_col='time')
esb_df.index = pd.to_datetime(esb_df.index)

# to streamline the vfdPower columns, since ESB has 2 and kissam has x - 1 will duplicate the column for kissam
kissam_df['Kissam_Tower_2a vfdPower'] = kissam_df['Kissam_Tower_2b vfdPower'] = kissam_df['Kissam_Tower_2 vfdPower']
kissam_df = kissam_df.drop('Kissam_Tower_2 vfdPower', axis=1)

# select features and target
kissam_features = ['Kissam_Tower_2 enteringWaterTemp', 'outdoorAirDryBulb', 'outdoorAirWetBulb', 'Kissam_Tower_2 vfdPercent', 'Kissam_Tower_2a vfdPower', 'Kissam_Tower_2b vfdPower']
kissam_target = 'Kissam_Tower_2 leavingWaterTemp'

esb_features = ['ESB_Tower_2 enteringWaterTemp', 'ESB_Tower_2 outdoorAirDryBulb', 'ESB_Tower_2 outdoorAirWetBulb', 'ESB_Tower_2 vfdPercent', 'Cell_2bFan vfdPower', 'Cell_2aFan vfdPower']
esb_target = 'ESB_Tower_2 leavingWaterTemp'

# Create ESB Tower 2 Summer Model

In [3]:
# only take data for one season
df = esb_df
season = "summer"
building_name = "ESB"
tower_number = 2
features = esb_features
target = esb_target
step_back = 6
save_timestepped = False

"""
1. Convert data into a model-compatible shape
"""

print("\nSelecting season...")
df = model_prep.choose_season(df, season=season, season_col_name=f"{building_name}_Tower_{tower_number} season",)

# save a boolean series that specifies whether the cooling tower is on
on_condition = df[f"{building_name}_Tower_{tower_number} fanStatus"]

# select features and target and create final dataframe that includes only relevant features and target
print("\nSelecting target and features...")
df = df[features].join(df[target], on=df.index)

# normalize data
print("\nNormalizing...")
scaler = model_prep.NormalizationHandler()
df = scaler.normalize(dtframe=df, target_col=target)

# prepare dataframe for lstm by adding timesteps
print("\nCreating timesteps...")
lstm_df = model_prep.create_timesteps(
        df, n_in=step_back, n_out=1, target_name=target
)

# remove cases where spring data would leak into summer data (i.e. intial timesteps)
print("\nRemoving irrelevant data...")
lstm_df = model_prep.remove_irrelevant_data(lstm_df, on_condition, step_back)



Selecting season...
There are 25784 rows of data for the summer season.

Selecting target and features...

Normalizing...
Mininimum ESB_Tower_2 leavingWaterTemp = 0.0
Maximum ESB_Tower_2 leavingWaterTemp = 91.15867614746094

Creating timesteps...

Removing irrelevant data...
Number of samples in summer data before removing off times: 25784
 Number of samples in summer data after removing off times: 11260


In [4]:
"""
2. Split data into training and testing sets
"""
print("\nSplitting training and testing sets...")
tss = TimeSeriesSplit(n_splits=3)

X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
y = lstm_df[f"{target}(t)"]  # only have target column

for train_index, test_index in tss.split(X):  # split into training and testing
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

"""
3. Get timestepped data as a 3D vector
"""
print("\nCreating 3D vector...")
vec_X_train = model_prep.df_to_3d(
    lstm_dtframe=X_train, num_columns=len(features) + 1, step_back=step_back
)
vec_X_test = model_prep.df_to_3d(
    lstm_dtframe=X_test, num_columns=len(features) + 1, step_back=step_back
)

vec_y_train = y_train.values
vec_y_test = y_test.values

print(vec_X_train.shape, vec_X_test.shape, vec_y_train.shape, vec_y_test.shape)


Splitting training and testing sets...

Creating 3D vector...
(8445, 6, 7) (2815, 6, 7) (8445,) (2815,)


In [5]:
"""
4. Create and Train model
"""
print("\nCreating model...")
model = Sequential()
model.add(LSTM(50, input_shape=(vec_X_train.shape[1], vec_X_train.shape[2])))
model.add(Dense(1))
model.compile(loss="mae", optimizer="adam")

print("\nTraining model...")
history = model.fit(
    vec_X_train,
    vec_y_train,
    epochs=50,
    batch_size=72,
    validation_data=(vec_X_test, vec_y_test),
    verbose=0,
    shuffle=False,
)


Creating model...


2023-06-19 15:14:33.418254: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.



Training model...


In [7]:
"""
5. Display results
"""
print("\nMaking predictions...")
yhat = model.predict(vec_X_test)


print("\nResults:...")
results_df = pd.DataFrame(
    {
        "actual": vec_y_test.reshape((vec_y_test.shape[0])),
        "predicted": yhat.reshape((yhat.shape[0])),
    },
    index=y_test.index,
)
results_df = scaler.denormalize_results(results_df)

# Create a new DataFrame with the desired 5-minute interval index
new_index = pd.date_range(
    start=results_df.index.min(), end=results_df.index.max(), freq="5min"
)
display_df = pd.DataFrame(index=new_index)
# Merge the new DataFrame with the original DataFrame
display_df = display_df.merge(
    results_df, how="left", left_index=True, right_index=True
)

mabs_error = mean_absolute_error(results_df["actual"], results_df["predicted"])
rmse = np.sqrt(mean_squared_error(results_df["actual"], results_df["predicted"]))
print("Mean Absolute Error: %.3f" % mabs_error)
print("RMSE: %.3f" % rmse)

fig = px.line(display_df, x=display_df.index, y=["actual", "predicted"])
fig.update_layout(
    title=f"{building_name} Tower {tower_number} LSTM Model Results",
    xaxis_title="time",
    yaxis_title=target,
)
fig.show()


Making predictions...

Results:...
Mean Absolute Error: 1.535
RMSE: 1.951


# Use ESB Tower 2 Summer Model on Kissam Tower 2

In [8]:
# only take data for one season
df = kissam_df
season = "summer"
building_name = "Kissam"
tower_number = 2
features = kissam_features
target = kissam_target
step_back = 6
save_timestepped = False


# only take data for one season
df = model_prep.choose_season(
        df,
        season=season,
        season_col_name=f"{building_name}_Tower_{tower_number} season",
)

# save a boolean series that specifies whether the cooling tower is on
on_condition = df[f"{building_name}_Tower_{tower_number} fanStatus"]

# select features and targets and create final dataframe that includes only relevant features and targets
df = df[features].join(df[target], on=df.index)

 # normalize data
scaler = model_prep.NormalizationHandler()
df = scaler.normalize(dtframe=df, target_col=target)

# prepare dataframe for lstm by adding timesteps
lstm_df = model_prep.create_timesteps(
    df, n_in=step_back, n_out=1, target_name=target
)

# remove cases where spring data would leak into summer data (i.e. intial timesteps)
lstm_df = model_prep.remove_irrelevant_data(lstm_df, on_condition, step_back)


There are 25771 rows of data for the summer season.
Mininimum Kissam_Tower_2 leavingWaterTemp = 59.37008285522461
Maximum Kissam_Tower_2 leavingWaterTemp = 94.11427307128906
Number of samples in summer data before removing off times: 25771
 Number of samples in summer data after removing off times: 25706


In [9]:
"""
2. Convert tower data into a model-compatible shape i.e. get timestepped data as a 3D vector
"""

tss = TimeSeriesSplit(n_splits=3)
X = lstm_df.drop(f"{target}(t)", axis=1)  # drop target column
y = lstm_df[f"{target}(t)"]  # only have target column

vec_X_test = model_prep.df_to_3d(
    lstm_dtframe=X, num_columns=len(features) + 1, step_back=step_back
)
vec_y_test = y.values
print(vec_X_test.shape, vec_y_test.shape)


(25706, 6, 7) (25706,)


In [11]:
"""
3. Load model and predict
"""
# load model of the other tower

yhat = model.predict(vec_X_test)

# display results
results_df = pd.DataFrame(
    {
        "actual": vec_y_test.reshape((vec_y_test.shape[0])),
        "predicted": yhat.reshape((yhat.shape[0])),
    },
    index=y.index,
)
results_df = scaler.denormalize_results(results_df)

# Create a new DataFrame with the desired 5-minute interval index
new_index = pd.date_range(
    start=results_df.index.min(), end=results_df.index.max(), freq="5min"
)
display_df = pd.DataFrame(index=new_index)
# Merge the new DataFrame with the original DataFrame
display_df = display_df.merge(
    results_df, how="left", left_index=True, right_index=True
)

mabs_error = mean_absolute_error(results_df["actual"], results_df["predicted"])
rmse = np.sqrt(mean_squared_error(results_df["actual"], results_df["predicted"]))
print("Mean Absolute Error: %.3f" % mabs_error)
print("RMSE: %.3f" % rmse)

fig = px.line(display_df, x=display_df.index, y=["actual", "predicted"])
fig.update_layout(
    title=f"ESB Tower 2 model used on Kissam Tower 2 ({season}) (zero retraining) LSTM Model Results",
    xaxis_title="time",
    yaxis_title=target,
)
fig.show()
fig.write_html(f"../plots/esb2_to_kissam2_summer_lstm.html")

Mean Absolute Error: 1.668
RMSE: 2.090
