<h1 style="color:#27469c"> Preprocessing of ESB Cooling Towers 1 and 2 data </h1>


<h1 style="color:#27469c">ESB Cooling Tower 1</h1>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import preprocessor
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

rootpath = ".."

In [None]:
# load cooling tower data
df = pd.read_csv(f'{rootpath}/data/esb/2422_1.csv', index_col='time')
df.index = pd.to_datetime(df.index, utc='True')
df = df.drop(columns=['Setpoint_Python', 'Setpoint_Existing'])
df.drop(columns=df.filter(like='generated', axis=1).columns, inplace=True)

initial_data_size = df.shape[0]
df.columns

<h3 style="color:#27469c">Missing data removal</h3>

In [None]:
# drop columns with no data (FlowCond)
df = df.dropna(axis=1, how="all")

# replace 0.0 in enviroment columns with null
env_cols = ["TempWetBulb", "TempCondIn", "TempEvapOut", "TempEvapIn", "TempAmbient", "PerHumidity"]
df[env_cols] = df[env_cols].replace(0.0, np.nan)

print(df.isna().sum() / df.shape[0])

df = df.dropna()
print(f"After missing data removal, we are left with {df.shape[0]} rows out of {initial_data_size}.")

# Iterate through the columns and create a separate figure for each
for column in df.columns[1:]:
    plt.figure()  # Create a new figure
    plt.plot(df.index, df[column])
    plt.title(f'Trendline for {column}')
    plt.xlabel('Year')
    plt.ylabel(column)

plt.show()

<h3 style="color:#27469c">Add efficiency and season columns + additional features</h3>

In [None]:
preprocessor.create_season_col(datadf=df, season_col_name="Season")
df["DayOfWeek"] = df.index.weekday
df['HourOfDay'] = df.index.hour


# columns modified/dropped to align with kissam
df["PerFreqFan"] = df["PerFreqFanA"]/2.0 + df["PerFreqFanB"]/2.0
df.drop(columns=['PerFreqFanA', 'PerFreqFanB', 'PressDiffCond', 'PressDiffEvap', 'RunChi', 'RunFanB', 'RunFanA', 'PerFreqChiP', 'RunConP'], inplace=True)

<h3 style="color:#27469c">Target</h3>

Since the three power variables are in the same range, we can conclude that they are using the same units and combine them into a single energy consumption variable.

In [None]:
df['EnergyConsumption'] = df["PowFanA"] + df["PowFanB"] + df["PowConP"] + df["PowChi"] + df["PowChiP"]
df.drop(columns=["PowFanA", "PowFanB", "PowConP", "PowChi", "PowChiP"], inplace=True)

In [None]:
df.columns

In [None]:
column = "EnergyConsumption"
plt.figure()  # Create a new figure
plt.plot(df.index, df[column])
plt.title(f'Trendline for {column}')
plt.xlabel('Year')
plt.ylabel(column)

plt.show()

In [None]:
# save preprocessed tower 1 data
df.sort_index(axis=1).to_csv(f'{rootpath}/data/esb/esb1_preprocessed.csv')

<h1 style="color:#27469c">ESB Cooling Tower 2</h1>

Replicate the same steps as for cooling tower 1

In [None]:
# load cooling tower data
df = pd.read_csv(f'{rootpath}/data/esb/2841_2.csv', index_col='time')
df.index = pd.to_datetime(df.index, utc='True')
df.drop(columns=df.filter(like='generated', axis=1).columns, inplace=True)
initial_data_size = df.shape[0]

In [None]:
# missing data removal
df = df.dropna(axis=1, how="all")
env_cols = ["TempWetBulb", "TempCondIn", "TempEvapOut", "TempEvapIn", "TempAmbient", "PerHumidity"]
df[env_cols] = df[env_cols].replace(0.0, np.nan)
df[["FlowEvap"]] = df[["FlowEvap"]].applymap(lambda x: np.nan if x < 0.0 else x)
print(df.isna().sum() / df.shape[0])
df = df.dropna()
print(f"After missing data removal, we are left with {df.shape[0]} rows out of {initial_data_size}.")

In [None]:
preprocessor.create_season_col(datadf=df, season_col_name="Season")
df["DayOfWeek"] = df.index.weekday
df['HourOfDay'] = df.index.hour


# columns modified/dropped to align with kissam
df["PerFreqFan"] = df["PerFreqFanA"]/2.0 + df["PerFreqFanB"]/2.0
df.drop(columns=['PerFreqFanA', 'PerFreqFanB', 'PressDiffCond', 'PressDiffEvap', 'RunChi', 'RunFanB', 'RunFanA', 'PerFreqChiP', 'RunConP'], inplace=True)

In [None]:
# target
df['EnergyConsumption'] = df["PowFanA"] + df["PowFanB"] + df["PowConP"] + df["PowChi"] + df["PowChiP"]
df.drop(columns=["PowFanA", "PowFanB", "PowConP", "PowChi", "PowChiP"], inplace=True)

In [None]:
column = "EnergyConsumption"
plt.figure()  # Create a new figure
plt.plot(df.index, df[column])
plt.title(f'Trendline for {column}')
plt.xlabel('Year')
plt.ylabel(column)

plt.show()

In [None]:
# save preprocessed tower 2 data
df.sort_index(axis=1).to_csv(f'{rootpath}/data/esb/esb2_preprocessed.csv')

In [None]:
df.columns