In [1]:
# Load Packages
import pandas as pd
import numpy as np
import math

import plotly.express as px

import warnings
warnings.filterwarnings('ignore')


from sklearn.metrics import mean_squared_error, mean_absolute_error

from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import LSTM
from keras import metrics, optimizers, losses

import model_generalized
import model_prep

2023-06-20 16:52:37.009490: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from_building_name = "ESB"
from_tower_number = 1
to_building_name = "Kissam"
to_tower_number = 1
to_features = ['Kissam_Tower_1 enteringWaterTemp', 'outdoorAirDryBulb', 'outdoorAirWetBulb', 'Kissam_Tower_1 vfdPercent', 'Kissam_Tower_1a vfdPower', 'Kissam_Tower_1b vfdPower']
to_target = 'Kissam_Tower_1 leavingWaterTemp'
season = "summer"
finetuning_percentage = 0.25
step_back = 6

In [3]:
# load data
to_df = pd.read_csv(
    f"../data/{to_building_name.lower()}/{to_building_name.lower()}_tower_{to_tower_number}_preprocessed.csv",
    index_col="time",
)
to_df.index = pd.to_datetime(to_df.index)

# only take data for one season
to_df = model_prep.choose_season(
    to_df,
    season=season,
    season_col_name=f"{to_building_name}_Tower_{to_tower_number} season",
)

# save a boolean series that specifies whether the cooling tower is on
on_condition = to_df[f"{to_building_name}_Tower_{to_tower_number} fanStatus"]

# select features and targets and create final dataframe that includes only relevant features and targets
to_df = to_df[to_features].join(to_df[to_target], on=to_df.index)

# normalize data
scaler = model_prep.NormalizationHandler()
to_df = scaler.normalize(dtframe=to_df, target_col=to_target)

# prepare dataframe for lstm by adding timesteps
lstm_to_df = model_prep.create_timesteps(
    to_df, n_in=step_back, n_out=1, target_name=to_target
)

# remove cases where spring data would leak into summer data (i.e. intial timesteps)
lstm_to_df = model_prep.remove_irrelevant_data(lstm_to_df, on_condition, step_back)


There are 25771 rows of data for the summer season.
Mininimum Kissam_Tower_1 leavingWaterTemp = 59.37008285522461
Maximum Kissam_Tower_1 leavingWaterTemp = 94.11427307128906
Number of samples in summer data before removing off times: 25771
 Number of samples in summer data after removing off times: 25693


In [4]:
"""
2. Create finetuning dataset based on finetuning_percentage
"""

print("\nSplitting training and testing sets...")

X = lstm_to_df.drop(f"{to_target}(t)", axis=1)  # drop target column
y = lstm_to_df[f"{to_target}(t)"]  # only have target column

train_split = math.ceil(finetuning_percentage * len(X))

# split into input and outputs
X_train = X.iloc[:train_split, :]
X_test = X.iloc[train_split:, :]
y_train = y.iloc[:train_split]
y_test = y.iloc[train_split:]

"""
3. Get timestepped data as a 3D vector
"""
print("\nCreating 3D vector...")
vec_X_train = model_prep.df_to_3d(
    lstm_dtframe=X_train, num_columns=len(to_features) + 1, step_back=step_back
)
vec_X_test = model_prep.df_to_3d(
    lstm_dtframe=X_test, num_columns=len(to_features) + 1, step_back=step_back
)

vec_y_train = y_train.values
vec_y_test = y_test.values

print(vec_X_train.shape, vec_X_test.shape, vec_y_train.shape, vec_y_test.shape)


Splitting training and testing sets...

Creating 3D vector...
(6424, 6, 7) (19269, 6, 7) (6424,) (19269,)


In [5]:
"""
3. Load model and fine tune
"""

# load model of the other tower
model = load_model(f"../models_saved/{from_building_name.lower()}{from_tower_number}_{season}_lstm/")

2023-06-20 16:52:41.522651: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
model.trainable = True

model.compile(optimizer=optimizers.Adam(1e-5),  # Very low learning rate
              loss="mae",
              metrics=[metrics.BinaryAccuracy()])

# model.compile(loss="mae", optimizer="adam")

In [7]:
history = model.fit(vec_X_train, vec_y_train, epochs=10, verbose=0, shuffle=False)

In [8]:
"""
3. Predict and display results
"""
# load model of the other tower

yhat = model.predict(vec_X_test)

print("\nResults:...")
# display results
results_df = pd.DataFrame(
    {
        "actual": vec_y_test.reshape((vec_y_test.shape[0])),
        "predicted": yhat.reshape((yhat.shape[0])),
    },
    index=y_test.index,
)
results_df = scaler.denormalize_results(results_df)

# Create a new DataFrame with the desired 5-minute interval index
new_index = pd.date_range(
    start=results_df.index.min(), end=results_df.index.max(), freq="5min"
)
display_df = pd.DataFrame(index=new_index)
# Merge the new DataFrame with the original DataFrame
display_df = display_df.merge(
    results_df, how="left", left_index=True, right_index=True
)

mabs_error = mean_absolute_error(results_df["actual"], results_df["predicted"])
rmse = np.sqrt(mean_squared_error(results_df["actual"], results_df["predicted"]))
print("Mean Absolute Error: %.3f" % mabs_error)
print("RMSE: %.3f" % rmse)

fig = px.line(display_df, x=display_df.index, y=["actual", "predicted"])
fig.update_layout(
    title=f"{from_building_name} Tower {from_tower_number} model used on {to_building_name} Tower {to_tower_number} ({season}) ({finetuning_percentage*100}% fine-tuning) LSTM Model Results",
    xaxis_title="time",
    yaxis_title=to_target,
)
fig.show()

# fig.write_html(f"../plots/interbuilding_transfers/{from_building_name.lower()}{from_tower_number}_to_{to_building_name.lower()}{to_tower_number}_{season}_lstm.html")


Results:...
Mean Absolute Error: 0.635
RMSE: 0.867


In [9]:
kissam1_features = ['Kissam_Tower_1 enteringWaterTemp', 'outdoorAirDryBulb', 'outdoorAirWetBulb', 'Kissam_Tower_1 vfdPercent', 'Kissam_Tower_1a vfdPower', 'Kissam_Tower_1b vfdPower']
kissam1_target = 'Kissam_Tower_1 leavingWaterTemp'

model_generalized.intra_season_transfer(from_building_name="ESB", from_tower_number=1, to_building_name="Kissam", to_tower_number=1, to_features=kissam1_features, to_target=kissam1_target, season="summer", finetuning_percentage=0.25)

There are 25771 rows of data for the summer season.
Mininimum Kissam_Tower_1 leavingWaterTemp = 59.37008285522461
Maximum Kissam_Tower_1 leavingWaterTemp = 94.11427307128906
Number of samples in summer data before removing off times: 25771
 Number of samples in summer data after removing off times: 25693
(6424, 6, 7) (19269, 6, 7) (6424,) (19269,)
Mean Absolute Error: 0.635
RMSE: 0.867
