In [1]:
# Load Packages
import pandas as pd
import numpy as np

import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error

import keras.models

import model_prep

datapath = "../data"

2023-06-14 12:17:03.801360: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
step_back = 6

<h2 style="color:#27469c">1. Preprocess Kissam Tower 2 data</h2>

In [3]:
# load data
df = pd.read_csv(f'{datapath}/kissam/kissam_tower_2_preprocessed.csv', index_col='time')
df.index = pd.to_datetime(df.index)

# only take data for one season
df = model_prep.choose_season(df, season='summer', season_col_name='Kissam_Tower_2 season')

# save a boolean series that specifies whether the cooling tower is on
on_condition = df['Kissam_Tower_2 fanStatus']

# select features and targets and create final dataframe that includes only relevant features and targets
features = ['Kissam_Tower_2 enteringWaterTemp', 'outdoorAirDryBulb', 'outdoorAirWetBulb', 'Kissam_Tower_2 vfdPercent', 'Kissam_Tower_2 vfdPower', 'Kissam_Tower_2 CH_PowChi', 'Kissam_Tower_2 perFreqConP', 'Kissam_Tower_2 powConP', 'Kissam_Tower_2 CH_Tonnage']
targets = ['Kissam_Tower_2 leavingWaterTemp']
df = df[features].join(df[targets], on=df.index)

# normalize data
scaler = model_prep.NormalizationHandler()
df = scaler.normalize(dtframe=df, target_col=targets[0])

# prepare dataframe for lstm by adding timesteps
lstm_df = model_prep.create_timesteps(df, n_in=step_back, n_out=1, target_names=targets)

# remove cases where spring data would leak into summer data (i.e. intial timesteps)
lstm_df = model_prep.remove_irrelevant_data(lstm_df, on_condition, step_back)

# save
lstm_df.to_csv(f"{datapath}/kissam/kissam2_summer_timestepped.csv")

There are 25771 rows of data for the summer season.
Min = 59.37008285522461
Max = 94.11427307128906
Number of samples in summer data before removing off times: 25771
 Number of samples in summer data after removing off times: 25706


<h2 style="color:#27469c">2. Convert Kissam Tower 2 data into a model-compatible shape</h2>

In [6]:
tss = TimeSeriesSplit(n_splits = 3)
X = lstm_df.drop([f'{target}(t)' for target in targets], axis=1) # drop target columns
y = lstm_df[[f'{target}(t)' for target in targets]] # only have target columns

vec_X_test = model_prep.df_to_3d(lstm_dtframe=X, num_columns=len(features)+1, step_back=step_back)

vec_y_test = y.values

print(vec_X_test.shape, vec_y_test.shape)

(25706, 6, 10) (25706, 1)


<h2 style="color:#27469c">3. Load model and predict</h2>

In [7]:
model = keras.models.load_model('../models_saved/kissam1_summer_lstm')

2023-06-14 12:22:34.868296: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
yhat = model.predict(vec_X_test)



In [9]:
results_df = pd.DataFrame({'actual': vec_y_test.reshape((vec_y_test.shape[0])), 'predicted': yhat.reshape((yhat.shape[0]))}, index=y.index)
results_df = scaler.denormalize_results(results_df)

mabs_error = mean_absolute_error(results_df['actual'], results_df['predicted'])
rmse = np.sqrt(mean_squared_error(results_df['actual'], results_df['predicted']))
print('Mean Absolute Error: %.3f' % mabs_error)
print('RMSE: %.3f' % rmse)

fig = px.line(results_df, x=results_df.index, y=["actual", "predicted"])
fig.show()

Mean Absolute Error: 1.000
RMSE: 1.191
