<h1 style="color:#27469c">Preprocessing of Kissam Cooling Towers 1 and 2 data</h1>

In [None]:
import pandas as pd
import matplotlib.pyplot as pyplot
import preprocessor

import warnings
warnings.filterwarnings('ignore')

rootpath = ".."

In [None]:
# load cooling towers data
df = pd.read_csv(f'{rootpath}/data/kissam/kissam_cooling_towers.csv', index_col="time")
df.index = pd.to_datetime(df.index, utc='True')
initial_data_size = df.shape[0]

<h2 style="color:#27469c">Rename columns and separate the 2 cooling towers' data</h2>

In [None]:
renaming = {}
for i in [1,2]:
    renaming[f"CT_{i}.TempCondIn"] = f"Kissam_Tower_{i} enteringWaterTemp"
    renaming[f"CT_{i}.TempCondOut"] = f"Kissam_Tower_{i} leavingWaterTemp"
    renaming[f"CT_{i}.PerFreqFan"] = f"Kissam_Tower_{i} vfdPercent"
    renaming[f"CT_{i}.PowFan"] = f"Kissam_Tower_{i} vfdPower"
    renaming[f"CT_{i}.StatusFan"] = f"Kissam_Tower_{i} fanStatus"
    renaming[f"CT_{i}.FlowCond"] = f"Kissam_Tower_{i} flowCond"
    renaming[f"CT_{i}.PerFreqConP"] = f"Kissam_Tower_{i} perFreqConP"
    renaming[f"CT_{i}.PowConP"] = f"Kissam_Tower_{i} powConP"
    renaming[f"CT_{i}.PressDiffCond"] = f"Kissam_Tower_{i} pressDiffCond"
    renaming[f"CH_{i}.PowChi"] = f"Kissam_Tower_{i} CH_PowChi"
    renaming[f"CH_{i}.Tonnage"] = f"Kissam_Tower_{i} CH_Tonnage"
renaming["TempWetBulb"] = "outdoorAirWetBulb"
renaming["TempAmbient"] = "outdoorAirDryBulb"
renaming["Setpoint_Python"] = "setpointPython"
renaming["Setpoint_Existing"] = "setpointExisting"

df.rename(renaming, inplace=True, axis=1)

# remove generated columns
df = df[[col for col in df.columns if "generated" not in col]]

# split dataframes
df1 = df[[col for col in df.columns if "2" not in col]] # cooling tower 1 data
df2 = df[[col for col in df.columns if "1" not in col]] # cooling tower 2 data

# renaming columns to be identically prefixed for uniformity
df1.columns = ['Kissam_Tower_1 ' + col if not col.startswith('Kissam_Tower_1') else col for col in df1.columns]
df2.columns = ['Kissam_Tower_2 ' + col if not col.startswith('Kissam_Tower_2') else col for col in df2.columns]

<h2 style="color:#27469c">Cooling Tower 1</h2>

In [None]:
# missing data
df1 = preprocessor.remove_missing_data(datadf=df1, initial_data_size=initial_data_size)

# outliers
df1_on_condition = (df1["Kissam_Tower_1 fanStatus"] == True)
df1 = preprocessor.remove_outliers_std(df1, has_off_data=True, on_condition=df1_on_condition, threshold=5)

# add efficiency, season, dayOfWeek and hourOfDay columns
preprocessor.create_efficiency_col(datadf=df1, enteringWaterTemp="Kissam_Tower_1 enteringWaterTemp", leavingWaterTemp='Kissam_Tower_1 leavingWaterTemp', outdoorAirWetBulb='Kissam_Tower_1 outdoorAirWetBulb', efficiency_col_name='Kissam_Tower_1 efficiency')
preprocessor.create_season_col(datadf=df1, season_col_name="Kissam_Tower_1 season")
df1["Kissam_Tower_1 dayOfWeek"] = df1.index.weekday
df1['Kissam_Tower_1 hourOfDay'] = df1.index.hour

# to streamline the vfdPower columns, since ESB has 2 and kissam has 1 - I will duplicate the column for kissam
df1['Kissam_Tower_1 fanA_vfdPower'] = df1['Kissam_Tower_1 fanB_vfdPower'] = df1['Kissam_Tower_1 vfdPower']
df1 = df1.drop('Kissam_Tower_1 vfdPower', axis=1)

# save tower 1 preprocessed data
df1.sort_index(axis=1).to_csv(f'{rootpath}/data/kissam/kissam_tower_1_preprocessed.csv')

In [None]:
# missing data
df1 = preprocessor.remove_missing_data(datadf=df1, initial_data_size=initial_data_size)

# outliers
df1_on_condition = (df1["Kissam_Tower_1 fanStatus"] == True)
df1 = preprocessor.remove_outliers_std(df1, has_off_data=True, on_condition=df1_on_condition, threshold=5)

# add efficiency, season, dayOfWeek and hourOfDay columns
preprocessor.create_efficiency_col(datadf=df1, enteringWaterTemp="Kissam_Tower_1 enteringWaterTemp", leavingWaterTemp='Kissam_Tower_1 leavingWaterTemp', outdoorAirWetBulb='Kissam_Tower_1 outdoorAirWetBulb', efficiency_col_name='Kissam_Tower_1 efficiency')
preprocessor.create_season_col(datadf=df1, season_col_name="Kissam_Tower_1 season")
df1["Kissam_Tower_1 dayOfWeek"] = df1.index.weekday
df1['Kissam_Tower_1 hourOfDay'] = df1.index.hour

# to streamline the vfdPower columns, since ESB has 2 and kissam has 1 - I will duplicate the column for kissam
df1['Kissam_Tower_1 fanA_vfdPower'] = df1['Kissam_Tower_1 fanB_vfdPower'] = df1['Kissam_Tower_1 vfdPower']
df1 = df1.drop('Kissam_Tower_1 vfdPower', axis=1)

# save tower 1 preprocessed data
df1.sort_index(axis=1).to_csv(f'{rootpath}/data/kissam/kissam_tower_1_preprocessed.csv')

<h2 style="color:#27469c">Replicate for Cooling Tower 2</h2>

In [None]:
# missing data
df2 = preprocessor.remove_missing_data(datadf=df2, initial_data_size=initial_data_size)

# outliers
df2_on_condition = (df2["Kissam_Tower_2 fanStatus"] == True)
df2 = preprocessor.remove_outliers_std(df2, has_off_data=True, on_condition=df2_on_condition, threshold=5)

# add efficiency, season, dayOfWeek and hourOfDay columns
preprocessor.create_efficiency_col(datadf=df2, enteringWaterTemp="Kissam_Tower_2 enteringWaterTemp", leavingWaterTemp='Kissam_Tower_2 leavingWaterTemp', outdoorAirWetBulb='Kissam_Tower_2 outdoorAirWetBulb', efficiency_col_name='Kissam_Tower_2 efficiency')
preprocessor.create_season_col(datadf=df2, season_col_name="Kissam_Tower_2 season")
df2["Kissam_Tower_2 dayOfWeek"] = df2.index.weekday
df2['Kissam_Tower_2 hourOfDay'] = df2.index.hour

# to streamline the vfdPower columns, since ESB has 2 and kissam has 1 - I will duplicate the column for kissam
df2['Kissam_Tower_2 fanA_vfdPower'] = df2['Kissam_Tower_2 fanB_vfdPower'] = df2['Kissam_Tower_2 vfdPower']
df2 = df2.drop('Kissam_Tower_2 vfdPower', axis=1)

# save tower 2 preprocessed data
df2.sort_index(axis=1).to_csv(f'{rootpath}/data/kissam/kissam_tower_2_preprocessed.csv')