In [1]:
# Import the required libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psychrolib


# 1. Data Pre-Processing

In [3]:
# Combine All Fault and Fault-free Case and Labeling

def process_df(file_name, fault_value, interval=60):
    df = pd.read_csv(file_name)
    df = df.iloc[::interval, :]  # Take data from every 'interval' rows
    df.insert(1, 'Fault', fault_value)
    return df

# Normal
df_unfaulted = process_df('ChillerPlant.csv', "N", interval=60)

# WCC Sensors Bias - 2 ways
# Chilled water leaving temperature sensor bias of chiller 1 
df_CH_bias1_faulted = process_df('ChillerPlant_chiller_bias_2.csv', "CHS", interval=240)
df_CH_bias2_faulted = process_df('ChillerPlant_chiller_bias_-2.csv', "CHS", interval=240)
df_CH_bias3_faulted = process_df('ChillerPlant_chiller_bias_1.csv', "CHS", interval=240)
df_CH_bias4_faulted = process_df('ChillerPlant_chiller_bias_-1.csv', "CHS", interval=240)

# CT Sensors Bias - 2 ways
# Condenser water leaving temperature sensor bias of cooling tower 1 
df_CT_bias1_faulted = process_df('ChillerPlant_coolingtower_bias_2.csv', "CTS", interval=240)
df_CT_bias2_faulted = process_df('ChillerPlant_coolingtower_bias_-2.csv', "CTS", interval=240)
df_CT_bias3_faulted = process_df('ChillerPlant_coolingtower_bias_1.csv', "CTS", interval=240)
df_CT_bias4_faulted = process_df('ChillerPlant_coolingtower_bias_-1.csv', "CTS", interval=240)

# CT Water Scale - 2 ways
# Fouling of cooling tower heat exchanger
df_CT_foul1_faulted = process_df('ChillerPlant_coolingtower_fouling_065.csv', "CTF", interval=180)
df_CT_foul2_faulted = process_df('ChillerPlant_coolingtower_fouling_080.csv', "CTF", interval=180)
df_CT_foul3_faulted = process_df('ChillerPlant_coolingtower_fouling_095.csv', "CTF", interval=180)

# Pump Pressure Bias - 2 ways
# Differential pressure sensor bias in the secondary chilled water loop
df_CHWP_pressure1_faulted = process_df('ChillerPlant_secondary_chilled_water_pressure_bias_020.csv', "CPP", interval=240)
df_CHWP_pressure2_faulted = process_df('ChillerPlant_secondary_chilled_water_pressure_bias_-020.csv', "CPP", interval=240)
df_CHWP_pressure3_faulted = process_df('ChillerPlant_secondary_chilled_water_pressure_bias_010.csv', "CPP", interval=240)
df_CHWP_pressure4_faulted = process_df('ChillerPlant_secondary_chilled_water_pressure_bias_-010.csv', "CPP", interval=240)

# Valve Bypass Leakage - 1 way
# Leakage of the condenser water leaving the three-way valve
df_Bypass_leakage1_faulted = process_df('ChillerPlant_bypass_leakage_025.csv', "BPL", interval=180)
df_Bypass_leakage2_faulted = process_df('ChillerPlant_bypass_leakage_050.csv', "BPL", interval=180)
df_Bypass_leakage3_faulted = process_df('ChillerPlant_bypass_leakage_075.csv', "BPL", interval=180)

# Valve Bypass Stuck - 1 way
# Stuck of the condenser water leaving the three-way valve
df_Bypass_stuck1_faulted = process_df('ChillerPlant_bypass_stuck_050.csv', "BPS", interval=120)
df_Bypass_stuck2_faulted = process_df('ChillerPlant_bypass_stuck_075.csv', "BPS", interval=120)

# CT Control Faulut - 1 way
# PID control for condenser water supply temperature
df_CT_PID_faulted = process_df('ChillerPlant_coolingtower_PI.csv', "CTPID", interval=60)

# Concat All Cases
DFs = pd.concat([
    df_unfaulted,
    df_CH_bias1_faulted, df_CH_bias2_faulted, df_CH_bias3_faulted, df_CH_bias4_faulted,
    df_CT_bias1_faulted, df_CT_bias2_faulted, df_CT_bias3_faulted, df_CT_bias4_faulted,
    df_CT_foul1_faulted, df_CT_foul2_faulted, df_CT_foul3_faulted,
    df_CHWP_pressure1_faulted, df_CHWP_pressure2_faulted, df_CHWP_pressure3_faulted, df_CHWP_pressure4_faulted,
    df_Bypass_leakage1_faulted, df_Bypass_leakage2_faulted, df_Bypass_leakage3_faulted,
    df_Bypass_stuck1_faulted, df_Bypass_stuck2_faulted,
    df_CT_PID_faulted
])

DFs.head()

#DF.tail()
#print(len(DF))


Unnamed: 0,Datetime,Fault,CDWL_CW_FLOW,CDWL_PM_POW_1,CDWL_PM_POW_2,CDWL_PM_POW_3,CDWL_RW_TEMP,CDWL_SW_TEMP,CHL_CD_FLOW_1,CHL_CD_FLOW_2,...,CWL_SEC_PM_POW_2,CWL_SEC_PM_SPD_1,CWL_SEC_PM_SPD_2,CWL_SEC_PM_STA_1,CWL_SEC_PM_STA_2,CWL_SEC_RW_TEMP,CWL_SEC_SW_TEMP,OA_TEMP,OA_TEMP_WB,TWV_CTRL
0,2018-01-01 01:00:00,N,1126.6614,46.084146,0.0,0.0,69.188995,61.984875,1126.6584,0.001502,...,0.0,1.0,0.0,1.0,0.0,56.816963,57.150177,9.070051,10.355011,0.0
60,2018-01-01 02:00:00,N,1126.6614,28.01234,4.73731e-24,3.340467e-32,63.610245,60.103905,1126.6584,0.001502,...,51.716137,1.0,1.0,1.0,1.0,56.06066,56.10933,9.612884,10.925037,0.000902
120,2018-01-01 03:00:00,N,1126.6614,8.838832,0.0,0.0,60.94123,59.98465,1126.6584,0.001502,...,51.716137,1.0,1.0,1.0,1.0,54.33828,54.349762,10.632746,12.002028,0.0313
180,2018-01-01 04:00:00,N,1126.6614,7.700014,1.897874e-22,0.0,59.942516,59.670662,1126.6584,0.001502,...,48.771226,0.982353,0.982353,1.0,1.0,53.98227,53.984024,10.649775,12.019989,0.394997
240,2018-01-01 05:00:00,N,1126.6614,7.622777,1.744471e-28,0.0,59.875774,59.754925,1126.6584,0.001502,...,11.441535,0.61952,0.61952,1.0,1.0,53.952988,53.952496,11.487482,12.904993,0.610889


In [4]:
DF_main = DFs # Make a COPY
#print(DF_main)

# Faults Statistics
fault_counts = DF_main['Fault'].value_counts()
print(fault_counts)

Fault
CHS      8760
CTS      8760
CTF      8760
CPP      8760
BPL      8760
BPS      8760
N        8759
CTPID    8759
Name: count, dtype: int64


In [5]:
# Units Convert

# Convert all "TEMP" from F to C
def fahrenheit_to_celsius(f):
    return (f - 32) * 5.0/9.0

temp_columns = DF_main.filter(regex='TEMP')
DF_main[temp_columns.columns] = DF_main[temp_columns.columns].map(fahrenheit_to_celsius)

# Convert all "FLOW" from GPM to L/s
def gpm_to_ls(gpm):
    return gpm * 0.0631

flow_columns = DF_main.filter(regex='FLOW')
DF_main[flow_columns.columns] = DF_main[flow_columns.columns].map(gpm_to_ls)

# Convert CWL_SEC_LOAD from W to kW
DF_main['CWL_SEC_LOAD'] = DF_main['CWL_SEC_LOAD'] / 1000

# Swap Dry Bulb and Wet Bulb Temperatures => Original Data Incorrect!
DF_main.rename(columns={'OA_TEMP': 'TEMP_TEMP', 'OA_TEMP_WB': 'OA_TEMP'}, inplace=True)
DF_main.rename(columns={'TEMP_TEMP': 'OA_TEMP_WB'}, inplace=True)

#print(DF_main)

# 2. Modification: Constant and Noise Data (Decimal)

In [None]:
# Round up to 2 decimal
SPD_columns = DF_main.filter(regex=r'SPD') # Key: "SPD"
SPT_columns = DF_main.filter(regex=r'SPT')
decimal3 = list(SPD_columns.columns) + list(SPT_columns.columns)
DF_main[decimal3] = DF_main[decimal3].astype(float).round(3)
#print(DF_main[decimal3])

# Round up to 3 decimal
categorical_columns = ['Datetime', 'Fault']
decimal2 = DF_main.columns.difference(decimal3 + categorical_columns) # Not in decimal3 or categorical_columns
DF_main[decimal2] = DF_main[decimal2].astype(float).round(2)
#print(DF_main[decimal2])


# Filter the Noise Data (Power Meter)
noise = ['CDWL_PM_POW_1', 'CDWL_PM_POW_2', 'CDWL_PM_POW_3', 'CWL_PRI_PM_POW_1', 'CWL_PRI_PM_POW_2', 'CWL_PRI_PM_POW_3', 'CWL_SEC_PM_POW_1', 'CWL_SEC_PM_POW_2', 'CT_FLOW_1', 'CT_FLOW_2', 'CT_FLOW_3']
DF_main[noise] = DF_main[noise].mask(DF_main[noise] < 1, 0)

DF_main.head()

['CHL_COMP_SPD_CTRL_1', 'CHL_COMP_SPD_CTRL_2', 'CHL_COMP_SPD_CTRL_3', 'CT_FAN_SPD_1', 'CT_FAN_SPD_2', 'CT_FAN_SPD_3', 'CT_FAN_SPD_CTRL_1', 'CT_FAN_SPD_CTRL_2', 'CT_FAN_SPD_CTRL_3', 'CWL_SEC_PM_SPD_1', 'CWL_SEC_PM_SPD_2', 'CT_SW_TEMPSPT', 'CWL_PRI_SW_TEMPSPT', 'CWL_SEC_DPSPT']


"\nDF_main[decimal3] = DF_main[decimal3].astype(float).round(3)\n#print(DF_main[decimal3])\n\n# Round up to 3 decimal\ncategorical_columns = ['Datetime', 'Fault']\ndecimal2 = DF_main.columns.difference(decimal3 + categorical_columns) # Not in decimal3 or categorical_columns\nDF_main[decimal2] = DF_main[decimal2].astype(float).round(2)\n#print(DF_main[decimal2])\n\n\n# Filter the Noise Data (Power Meter)\nnoise = ['CDWL_PM_POW_1', 'CDWL_PM_POW_2', 'CDWL_PM_POW_3', 'CWL_PRI_PM_POW_1', 'CWL_PRI_PM_POW_2', 'CWL_PRI_PM_POW_3', 'CWL_SEC_PM_POW_1', 'CWL_SEC_PM_POW_2', 'CT_FLOW_1', 'CT_FLOW_2', 'CT_FLOW_3']\nDF_main[noise] = DF_main[noise].mask(DF_main[noise] < 1, 0)\n\nDF_main.head()\n"

# 3. Cleaning: Base Data (Turned-off)

In [10]:
# Split into sub-data set
CHL_columns = [col for col in DF_main.columns if "CHL_" in col]
df_CHL = DF_main[CHL_columns]

CT_columns = [col for col in DF_main.columns if "CT_" in col]
df_CT = DF_main[CT_columns]

CDWL_columns = [col for col in DF_main.columns if "CDWL_" in col]
df_CDWL = DF_main[CDWL_columns]

CWL_PRI_columns = [col for col in DF_main.columns if "CWL_PRI_" in col]
df_CWL_PRI = DF_main[CWL_PRI_columns]

CWL_SEC_columns = [col for col in DF_main.columns if "CWL_SEC_" in col]
df_CWL_SEC = DF_main[CWL_SEC_columns]

Other_columns = CHL_columns + CT_columns + CDWL_columns + CWL_PRI_columns + CWL_SEC_columns
df_Other = DF_main.drop(columns=Other_columns)

In [None]:
# Remove noise data: Power Meter and Flow Meter and TEMP Sensor
def clean_CHL_data(df, suffix):
    # If STATUS == 0; TEMP == 0, FLOW == 0, POW == 0
    CHL_X = df.filter(regex=fr'^CHL_.*{suffix}$')
    sta_columns = CHL_X.filter(regex=r'^CHL_STA') # Key
    pow_columns = CHL_X.filter(regex=r'^CHL_POW')
    flow_columns = CHL_X.filter(regex=r'^CHL_.*_FLOW_.*')
    temp_columns = CHL_X.filter(regex=r'^CHL_.*_TEMP_.*')
    columns_to_modify = list(pow_columns.columns) + list(flow_columns.columns) + list(temp_columns.columns)
    #print(columns_to_modify)
    CHL_X.loc[CHL_X[sta_columns.columns].eq(0).any(axis=1), columns_to_modify] = 0
    return CHL_X

def clean_CT_data(df, suffix):
    # If STATUS == 0; TEMP == 0, FLOW == 0, POW == 0
    CT_X = df.filter(regex=fr'^CT_.*{suffix}$')
    sta_columns = CT_X.filter(regex=r'^CT_STA') # Key
    pow_columns = CT_X.filter(regex=r'^CT_POW')
    temp_columns = CT_X.filter(regex=r'^CT_.*_TEMP_.*')
    flow_columns = CT_X.filter(regex=r'^CT_.*_FLOW_.*')
    columns_to_modify = list(pow_columns.columns) + list(temp_columns.columns) + list(flow_columns.columns)
    #print(columns_to_modify)
    CT_X.loc[CT_X[sta_columns.columns].eq(0).any(axis=1), columns_to_modify] = 0
    return CT_X

# Update back to df_CHL
df_CHL.update(clean_CHL_data(df_CHL, '_1'))
df_CHL.update(clean_CHL_data(df_CHL, '_2'))
df_CHL.update(clean_CHL_data(df_CHL, '_3'))

# Update back to df_CT
df_CT.update(clean_CT_data(df_CT, '_1'))
df_CT.update(clean_CT_data(df_CT, '_2'))
df_CT.update(clean_CT_data(df_CT, '_3'))

# Update df_CHL and df_CT back to DF_main
DF_main.update(df_CHL)
DF_main.update(df_CT)

DF_main.head()

        CHL_STA_1
0             1.0
60            1.0
120           1.0
180           1.0
240           1.0
...           ...
525240        1.0
525300        1.0
525360        1.0
525420        1.0
525480        1.0

[70078 rows x 1 columns]


In [None]:
# For output only

constants = ['CHL_CD_FLOW_1', 'CHL_CD_FLOW_2', 'CHL_CD_FLOW_3', 'CHL_CW_FLOW_1', 'CHL_CW_FLOW_2', 'CHL_CW_FLOW_3', 'CWL_SEC_DPSPT']
DF_main_clean = DF_main.drop(columns=constants) # After adding after adding the new features, before output.

DF_main_clean.head()

# One Sheet
with pd.ExcelWriter('df_faults_woF.xlsx', engine='xlsxwriter') as writer:
    DF_main_clean.to_excel(writer, sheet_name='main')


In [None]:
# Output By_Components
CHL_columns = [col for col in DF_main_clean.columns if "CHL_" in col]
df_CHL = DF_main_clean[CHL_columns]

CT_columns = [col for col in DF_main_clean.columns if "CT_" in col]
df_CT = DF_main_clean[CT_columns]

CDWL_columns = [col for col in DF_main_clean.columns if "CDWL_" in col]
df_CDWL = DF_main_clean[CDWL_columns]

CWL_PRI_columns = [col for col in DF_main_clean.columns if "CWL_PRI_" in col]
df_CWL_PRI = DF_main_clean[CWL_PRI_columns]

CWL_SEC_columns = [col for col in DF_main_clean.columns if "CWL_SEC_" in col]
df_CWL_SEC = DF_main_clean[CWL_SEC_columns]

Other_columns = CHL_columns + CT_columns + CDWL_columns + CWL_PRI_columns + CWL_SEC_columns
df_Other = DF_main_clean.drop(columns=Other_columns)

# For Temporary Review
with pd.ExcelWriter('Components_woF.xlsx', engine='xlsxwriter') as writer:
    df_CHL.to_excel(writer, sheet_name='CHL_Data')
    df_CT.to_excel(writer, sheet_name='CT_Data')
    df_CDWL.to_excel(writer, sheet_name='CDWL_Data')
    df_CWL_PRI.to_excel(writer, sheet_name='CWL_PRI_Data')
    df_CWL_SEC.to_excel(writer, sheet_name='CWL_SEC_Data')
    df_Other.to_excel(writer, sheet_name='Other_Data')

# Adding New Features

In [11]:
# Adding Features

# WCC
def add_WCC(df):
    # WCC1
    df['CHL_DeltaTEMP_1'] = df.apply(
        lambda row: (row['CHL_RW_TEMP_1'] - row['CHL_SW_TEMP_1'])
                     if row['CHL_POW_1'] > 5 else 0,
        axis=1)
    df['CHL_Cooling_Load_1'] = df.apply(
        lambda row: (row['CHL_DeltaTEMP_1']) * (row['CHL_CW_FLOW_1']) * 4.18 
                     if row['CHL_POW_1'] > 5 else 0,
        axis=1)
    df['CHL_COP_1'] = df.apply(
        lambda row: (row['CHL_Cooling_Load_1'] / row['CHL_POW_1'])
                    if row['CHL_POW_1'] > 5 else 0,
        axis=1)
    df['PLANT_POW_1'] = df['CHL_POW_1'] + df['CT_POW_1'] + df['CDWL_PM_POW_1'] + df['CWL_PRI_PM_POW_1']

    # WCC2
    df['CHL_DeltaTEMP_2'] = df.apply(
        lambda row: (row['CHL_RW_TEMP_2'] - row['CHL_SW_TEMP_2'])
                     if row['CHL_POW_2'] > 5 else 0,
        axis=1)
    df['CHL_Cooling_Load_2'] = df.apply(
        lambda row: (row['CHL_DeltaTEMP_2']) * (row['CHL_CW_FLOW_2']) * 4.18 
                     if row['CHL_POW_2'] > 5 else 0,
        axis=1)
    df['CHL_COP_2'] = df.apply(
        lambda row: (row['CHL_Cooling_Load_2'] / row['CHL_POW_2'])
                    if row['CHL_POW_2'] > 5 else 0,
        axis=1)
    df['PLANT_POW_2'] = df['CHL_POW_2'] + df['CT_POW_2'] + df['CDWL_PM_POW_2'] + df['CWL_PRI_PM_POW_2']

    #WCC3
    df['CHL_DeltaTEMP_3'] = df.apply(
        lambda row: (row['CHL_RW_TEMP_3'] - row['CHL_SW_TEMP_3'])
                     if row['CHL_POW_3'] > 5 else 0,
        axis=1)
    df['CHL_Cooling_Load_3'] = df.apply(
        lambda row: (row['CHL_DeltaTEMP_3']) * (row['CHL_CW_FLOW_3']) * 4.18 
                     if row['CHL_POW_3'] > 5 else 0,
        axis=1)
    df['CHL_COP_3'] = df.apply(
        lambda row: (row['CHL_Cooling_Load_3'] / row['CHL_POW_3'])
                    if row['CHL_POW_3'] > 5 else 0,
        axis=1)
    df['PLANT_POW_3'] = df['CHL_POW_3'] + df['CT_POW_3'] + df['CDWL_PM_POW_3'] + df['CWL_PRI_PM_POW_3']

    df['CHL_CW_TotalFLOW'] = df['CHL_CW_FLOW_1'] + df['CHL_CW_FLOW_2'] + df['CHL_CW_FLOW_3']
    df['CHL_CD_TotalFLOW'] = df['CHL_CD_FLOW_1'] + df['CHL_CD_FLOW_2'] + df['CHL_CD_FLOW_3']
    return df

# Cooling Tower
def add_CT(df):
    # CT1
    df['CT_DeltaTEMP_1'] = df.apply(
        lambda row: (row['CT_RW_TEMP_1'] - row['CT_SW_TEMP_1'])
                     if row['CT_FAN_SPD_1'] != 0 else 0,
        axis=1)
    df['CT_Load_1'] = df.apply(
        lambda row: row['CT_DeltaTEMP_1'] * row['CT_FLOW_1'] * 4.18 
                     if row['CT_FAN_SPD_1'] != 0 else 0,
        axis=1)
    df['CT_Eff_1'] = df.apply(
        lambda row: (row['CT_Load_1'] / row['CT_POW_1']) 
                        if row['CT_FAN_SPD_1'] != 0 and row['CT_POW_1'] != 0 else 0,
        axis=1)
    df['CT_ApproachTEMP_1'] = df.apply(
        lambda row: (row['OA_TEMP_WB'] - row['CT_SW_TEMP_1'])
                     if row['CT_FAN_SPD_1'] != 0 else 0,
        axis=1)
    df['CT_Error_1'] = df.apply(
        lambda row: (row['CT_SW_TEMPSPT'] - row['CT_SW_TEMP_1'])
                     if row['CT_FAN_SPD_1'] != 0 else 0,
        axis=1)
    df['CT_Dist_1'] = df.apply(
        lambda row: row['CT_POW_1'] / row['PLANT_POW_1']
                     if row['CT_POW_1'] != 0 else 0,
        axis=1)
    
    # CT2
    df['CT_DeltaTEMP_2'] = df.apply(
        lambda row: (row['CT_RW_TEMP_2'] - row['CT_SW_TEMP_2'])
                        if row['CT_FAN_SPD_2'] != 0 else 0,
        axis=1)
    df['CT_Load_2'] = df.apply(
        lambda row: row['CT_DeltaTEMP_2'] * row['CT_FLOW_2'] * 4.18 
                     if row['CT_FAN_SPD_2'] != 0 else 0,
        axis=1)
    df['CT_Eff_2'] = df.apply(
        lambda row: (row['CT_Load_2'] / row['CT_POW_2']) 
                        if row['CT_FAN_SPD_2'] != 0 and row['CT_POW_2'] != 0 else 0,
        axis=1)
    df['CT_ApproachTEMP_2'] = df.apply(
        lambda row: (row['OA_TEMP_WB'] - row['CT_SW_TEMP_2'])
                     if row['CT_FAN_SPD_2'] != 0 else 0,
        axis=1)
    df['CT_Error_2'] = df.apply(
        lambda row: (row['CT_SW_TEMPSPT'] - row['CT_SW_TEMP_2'])
                     if row['CT_FAN_SPD_2'] != 0 else 0,
        axis=1)
    df['CT_Dist_2'] = df.apply(
        lambda row: row['CT_POW_2'] / row['PLANT_POW_2']
                     if row['CT_POW_2'] != 0 else 0,
        axis=1)
        
    # CT3
    df['CT_DeltaTEMP_3'] = df.apply(
        lambda row: (row['CT_RW_TEMP_3'] - row['CT_SW_TEMP_3'])
                        if row['CT_FAN_SPD_3'] != 0 else 0,
        axis=1)
    df['CT_Load_3'] = df.apply(
        lambda row: row['CT_DeltaTEMP_3'] * row['CT_FLOW_3'] * 4.18 
                     if row['CT_FAN_SPD_3'] != 0 else 0,
        axis=1)
    df['CT_Eff_3'] = df.apply(
        lambda row: (row['CT_Load_3'] / row['CT_POW_3']) 
                        if row['CT_FAN_SPD_3'] != 0 and row['CT_POW_3'] != 0 else 0,
        axis=1)
    df['CT_ApproachTEMP_3'] = df.apply(
        lambda row: (row['OA_TEMP_WB'] - row['CT_SW_TEMP_3'])
                     if row['CT_FAN_SPD_3'] != 0 else 0,
        axis=1)
    df['CT_Error_3'] = df.apply(
        lambda row: (row['CT_SW_TEMPSPT'] - row['CT_SW_TEMP_3'])
                     if row['CT_FAN_SPD_3'] != 0 else 0,
        axis=1)
    df['CT_Dist_3'] = df.apply(
        lambda row: row['CT_POW_3'] / row['PLANT_POW_3']
                     if row['CT_POW_3'] != 0 else 0,
        axis=1)
    
    df['CT_TotalFLOW'] = df['CT_FLOW_1'] + df['CT_FLOW_2'] + df['CT_FLOW_3']
    return df

# Condensation Side
def add_CDWL(df):    
    df['CDWL_POW'] = df.apply(
        lambda row: row['CDWL_PM_POW_1'] + row['CDWL_PM_POW_2'] + row['CDWL_PM_POW_3'],
        axis=1)
    df['CDWL_DeltaTEMP'] = df.apply(
        lambda row: (row['CDWL_RW_TEMP'] - row['CDWL_SW_TEMP'])
                     if row['CDWL_POW'] != 0 else 0,
        axis=1)
    df['CDWL_Load'] = df.apply(
        lambda row: row['CDWL_DeltaTEMP'] * row['CDWL_CW_FLOW'] * 4.18
                     if row['CDWL_POW'] != 0 else 0,
        axis=1)
    df['CDWL_Error'] = df['CT_SW_TEMPSPT'] - df['CDWL_SW_TEMP']
    
    df['CDWL_DeltaFLOW'] = df['CHL_CD_TotalFLOW'] - df['CT_TotalFLOW'] # Bypass How Many FLow
    return df

# Chilled Side
def add_CWL_PRI(df):
    df['CWL_PRI_POW'] = df.apply(
        lambda row: row['CWL_PRI_PM_POW_1'] + row['CWL_PRI_PM_POW_2'] + row['CWL_PRI_PM_POW_3'],
        axis=1)
    df['CWL_PRI_DeltaTEMP'] = df.apply(\
        lambda row: (row['CWL_PRI_RW_TEMP'] - row['CWL_PRI_SW_TEMP'])
                     if row['CWL_PRI_POW'] != 0 else 0,
        axis=1)
    df['CWL_PRI_Load'] = df.apply(
        lambda row: row['CWL_PRI_DeltaTEMP'] * row['CWL_PRI_CW_FLOW'] * 4.18
                     if row['CWL_PRI_POW'] != 0 else 0,
        axis=1)
    df['CWL_SEC_POW'] = df.apply(
        lambda row: row['CWL_SEC_PM_POW_1'] + row['CWL_SEC_PM_POW_2'],
        axis=1)
    return df

# Other
def add_RH(df):
    psychrolib.SetUnitSystem(psychrolib.SI)
    def RH(outdoor_temp, wet_bulb_temp):
        return psychrolib.GetRelHumFromTWetBulb(outdoor_temp, wet_bulb_temp, 101325)
    df['Relative_Humidity'] = df.apply(lambda row: RH(row['OA_TEMP'], row['OA_TEMP_WB']), axis=1)
    df['Relative_Humidity'] = df['Relative_Humidity'].round(3)
    return df

# Apply New Features by orginial DF_main
DF_main = add_WCC(DF_main)
DF_main = add_CT(DF_main)
DF_main = add_CDWL(DF_main)
DF_main = add_CWL_PRI(DF_main)
DF_main = add_RH(DF_main)

constants = ['CHL_CD_FLOW_1', 'CHL_CD_FLOW_2', 'CHL_CD_FLOW_3', 'CHL_CW_FLOW_1', 'CHL_CW_FLOW_2', 'CHL_CW_FLOW_3', 'CWL_SEC_DPSPT']
DF_main = DF_main.drop(columns=constants) # After adding after adding the new features, before output.

  df['CT_Load_2'] = df.apply(
  df['CT_Eff_2'] = df.apply(
  df['CT_ApproachTEMP_2'] = df.apply(
  df['CT_Error_2'] = df.apply(
  df['CT_Dist_2'] = df.apply(
  df['CT_DeltaTEMP_3'] = df.apply(
  df['CT_Load_3'] = df.apply(
  df['CT_Eff_3'] = df.apply(
  df['CT_ApproachTEMP_3'] = df.apply(
  df['CT_Error_3'] = df.apply(
  df['CT_Dist_3'] = df.apply(
  df['CT_TotalFLOW'] = df['CT_FLOW_1'] + df['CT_FLOW_2'] + df['CT_FLOW_3']
  df['CDWL_POW'] = df.apply(
  df['CDWL_DeltaTEMP'] = df.apply(
  df['CDWL_Load'] = df.apply(
  df['CDWL_Error'] = df['CT_SW_TEMPSPT'] - df['CDWL_SW_TEMP']
  df['CDWL_DeltaFLOW'] = df['CHL_CD_TotalFLOW'] - df['CT_TotalFLOW'] # Bypass How Many FLow
  df['CWL_PRI_POW'] = df.apply(
  df['CWL_PRI_DeltaTEMP'] = df.apply(\
  df['CWL_PRI_Load'] = df.apply(
  df['CWL_SEC_POW'] = df.apply(
  df['Relative_Humidity'] = df.apply(lambda row: RH(row['OA_TEMP'], row['OA_TEMP_WB']), axis=1)


In [12]:
# Output data files (with new features)
with pd.ExcelWriter('df_faults_even.xlsx', engine='xlsxwriter') as writer:
    DF_main.to_excel(writer, sheet_name='main')

In [13]:
# Split into sub-data set

CHL_columns = [col for col in DF_main.columns if "CHL_" in col]
df_CHL = DF_main[CHL_columns]

CT_columns = [col for col in DF_main.columns if "CT_" in col]
df_CT = DF_main[CT_columns]

CDWL_columns = [col for col in DF_main.columns if "CDWL_" in col]
df_CDWL = DF_main[CDWL_columns]

CWL_PRI_columns = [col for col in DF_main.columns if "CWL_PRI_" in col]
df_CWL_PRI = DF_main[CWL_PRI_columns]

CWL_SEC_columns = [col for col in DF_main.columns if "CWL_SEC_" in col]
df_CWL_SEC = DF_main[CWL_SEC_columns]

Other_columns = CHL_columns + CT_columns + CDWL_columns + CWL_PRI_columns + CWL_SEC_columns
df_Other = DF_main.drop(columns=Other_columns)

In [14]:
# Output By_Components

with pd.ExcelWriter('Components_faults_even.xlsx', engine='xlsxwriter') as writer:
    df_CHL.to_excel(writer, sheet_name='CHL_Data')
    df_CT.to_excel(writer, sheet_name='CT_Data')
    df_CDWL.to_excel(writer, sheet_name='CDWL_Data')
    df_CWL_PRI.to_excel(writer, sheet_name='CWL_PRI_Data')
    df_CWL_SEC.to_excel(writer, sheet_name='CWL_SEC_Data')
    df_Other.to_excel(writer, sheet_name='Other_Data')


### 'df_faults_clean.xlsx' => cleaned the noise without new features

### 'df_faults.xlsx' with new features