In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import zero_one_loss
import random
import pickle
import os
from dask import compute
import dask.bag as db
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from collections import Counter
from random import randrange

from sklearn.ensemble import GradientBoostingClassifier

Module 1

Inject error to each sensor and check their impact on FDD performance

In [None]:
def process_by_weather(weather):
    # initialize result df
    cv_error_df = pd.DataFrame([])
    
    # self-defined fault types
    selected_fault_types = ['air_handling_unit_fan_motor_degradation',
                            'biased_economizer_sensor_mixed_t',
                            'duct_fouling',
                            'economizer_opening_stuck',
                            'hvac_setback_error_delayed_onset',
                            'hvac_setback_error_no_overnight_setback',
                            'hvac_setback_error_early_termination',
                            'improper_time_delay_setting_in_occupancy_sensors',
                            'lighting_setback_error_delayed_onset',
                            'lighting_setback_error_no_overnight_setback',
                            'lighting_setback_error_early_termination',
                            'return_air_duct_leakages',
                            'supply_air_duct_leakages',
                            'thermostat_bias'
                           ]    
    
    print(f'Processing: {weather}...')

    # read metadata file
    prefixed = [filename for filename in os.listdir(f'data/{weather}/{weather}/') if 'sensors' not in filename]
    if len(prefixed) == 1:
        meta_data_df = pd.read_csv(f'data/{weather}/{weather}/' + prefixed[0])
    else:
        raise Exception("Something wrong with finding the meta data file in the data folder. Make sure the original data folder are used for data processing")

    # define the fault_type_list. Baseline is removed from the fault type
#     fault_type_list = meta_data_df.fault_type.unique().tolist()
#     fault_type_list.remove('baseline')
    fault_type_list = selected_fault_types
    
    inputs = pd.DataFrame([])
    output = pd.DataFrame([])

    for fault_type in fault_type_list:

        #print(f' Processing: {fault_type}...')

        # first focus on one weather file and one fault type
        ids = meta_data_df.loc[meta_data_df.fault_type == fault_type].id.tolist()
        intensities = meta_data_df.loc[meta_data_df.fault_type == fault_type].fault_intensity.tolist()

        # non-fault IDs
        ids_none_fault = meta_data_df.loc[meta_data_df.fault_type == 'baseline'].id.tolist()

        # load data and make it from 15minute interval to hourly/daily data
        fault_data_df = pd.DataFrame([])
        for id_n, intensity in zip(ids, intensities):
            temp_df = pd.read_csv(f'data/{weather}/{weather}/{id_n}_sensors.csv')
            temp_df = temp_df.iloc[:,1:]
            temp_df = temp_df.groupby(np.arange(len(temp_df))//(4*24)).mean()
            #temp_df.loc[:,'label'] = intensity
            temp_df.loc[:,'label'] = fault_type
            fault_data_df = pd.concat([fault_data_df, temp_df], axis = 0)

        # dealing with non fault data
        temp_df = pd.read_csv(f'data/{weather}/{weather}/{ids_none_fault[0]}_sensors.csv')
        temp_df = temp_df.iloc[:,1:]
        temp_df = temp_df.groupby(np.arange(len(temp_df))//(4*24)).mean()
        temp_df.loc[:,'label'] = 'none'
        fault_data_df = pd.concat([fault_data_df, temp_df], axis = 0)
        fault_data_df = fault_data_df.reset_index(drop = True)

        inputs = pd.concat([inputs, fault_data_df.iloc[:,0:-9]], axis = 0, ignore_index = True)
        output = pd.concat([output, fault_data_df.iloc[:,-1]], axis = 0, ignore_index = True)
    
    print(f' Processing: inputs/output data ready for {weather} ...')
    
    # cross-validation
    cv = KFold(n_splits=5, shuffle=True, random_state = 42)

    # Iterate through CV splits
    results = []
    important_features = []
    i = 0
    for train_index, test_index in cv.split(inputs):
        X_train, X_test = inputs.iloc[train_index], inputs.iloc[test_index]
        y_train, y_test = output.iloc[train_index], output.iloc[test_index]
        # Fit the model on training data
        regr = RandomForestClassifier(n_estimators = 2, random_state=42)
        regr.fit(X_train, y_train.iloc[:,0])
        # save model
        filename = f'model/saved_model_{i}_{weather}.sav'
        pickle.dump(regr, open(filename, 'wb'))
        i += 1
        # feature importance
        feature_importance_temp = pd.DataFrame([])
        feature_importance_temp.loc[:,'sensor_name'] = inputs.columns
        feature_importance_temp.loc[:,'importance'] = regr.feature_importances_
        important_features += feature_importance_temp.sort_values(
            by=['importance'], ascending = False).sensor_name[0:10].tolist()
        # Generate predictions on the test data and collect
        y_test_predicted = regr.predict(X_test)
        testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
        results += [testing_error]

    CV_error = sum(results)/len(results)
    cv_error_df.loc[:,weather + 'Baseline'] = [weather, 'all_sensors', 'all_inaccuracy', CV_error]
    important_features = list(set(important_features))

    # with the selected sensors, add noises to them and see how they will impact the performance of FDD
    # Sensor Bias
    results = []
    print(f'  Processing: Bias, {weather}...')
    for sensor in important_features:
        i = 0
        for train_index, test_index in cv.split(inputs):
            X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
            y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
            # Bias of 5%
            X_test.loc[:,sensor] = X_test.loc[:,sensor] + X_test.loc[:,sensor].mean() * 0.05
            # load saved pickle file
            filename = f'model/saved_model_{i}_{weather}.sav'
            regr = pickle.load(open(filename, 'rb'))
            i += 1
            # Generate predictions on the test data and collect
            y_test_predicted = regr.predict(X_test)
            testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
            results += [testing_error]

        CV_error = sum(results)/len(results)
        cv_error_df.loc[:,weather + sensor + 'shift'] = [weather, sensor, 'shift', CV_error]

    # Sensor Drift
    results = []
    print(f'  Processing: Drift, {weather}...')
    for sensor in important_features:
        i = 0
        for train_index, test_index in cv.split(inputs):
            X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
            y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
            # Drift
            X_test.loc[:,sensor] = X_test.loc[:,sensor] + np.linspace(0, X_test.loc[:,sensor].mean()*0.1, num=len(X_test.loc[:,sensor]))
            # load saved pickle file
            filename = f'model/saved_model_{i}_{weather}.sav'
            regr = pickle.load(open(filename, 'rb'))
            i += 1
            # Generate predictions on the test data and collect
            y_test_predicted = regr.predict(X_test)
            testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
            results += [testing_error]

        CV_error = sum(results)/len(results)
        cv_error_df.loc[:,weather + sensor + 'shift'] = [weather, sensor, 'shift', CV_error]

    # Sensor complete failure   
    results = []
    print(f'  Processing: Failure, {weather}...')
    for sensor in important_features:
        i = 0
        for train_index, test_index in cv.split(inputs):
            X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
            y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
            # Failure
            X_test.loc[:,sensor] = X_test.loc[:,sensor].mean()
            # load saved pickle file
            filename = f'model/saved_model_{i}_{weather}.sav'
            regr = pickle.load(open(filename, 'rb'))
            i += 1
            # Generate predictions on the test data and collect
            y_test_predicted = regr.predict(X_test)
            testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
            results += [testing_error]

        CV_error = sum(results)/len(results)
        cv_error_df.loc[:,weather + sensor + 'failure'] = [weather, sensor, 'failure', CV_error]

    # Sensor precision degradation
    results = []
    print(f'  Processing: Degradation, {weather}...')
    for sensor in important_features:
        i = 0
        for train_index, test_index in cv.split(inputs):
            X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
            y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
            # degradation
            random_list = []
            for j in range(len(X_test)):
                random_list += [random.uniform(- X_test.loc[:,sensor].mean() * 0.05, X_test.loc[:,sensor].mean() * 0.05)]
            X_test.loc[:,sensor] = X_test.loc[:,sensor] + random_list
            # load saved pickle file
            filename = f'model/saved_model_{i}_{weather}.sav'
            regr = pickle.load(open(filename, 'rb'))
            i += 1
            # Generate predictions on the test data and collect
            y_test_predicted = regr.predict(X_test)
            testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
            results += [testing_error]

        CV_error = sum(results)/len(results)
        cv_error_df.loc[:,weather + sensor + 'degradation'] = [weather, sensor, 'degradation', CV_error]
    
    #post_processing error results
    processed_cv_error_df = cv_error_df.T.reset_index(drop = True)
    processed_cv_error_df.columns = ['weather', 'sensor_type', 'sensor_inaccuracy_type', 'CV_error']
    processed_cv_error_df.to_csv(f'results/{weather}_cv_error.csv', index = None)

In [None]:
#for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
for weather in ['TN_Knoxville', 'VA_Richmond']:
    process_by_weather(weather)

Module 2

Create fault probalistic table for sensors

In [None]:
# sensor_category_dict = {
#     'cooling_electricity [W]': 'electiricty_meter',
#     'electricity_facility [W]': 'electiricty_meter',
#     'whole_building_facility_total_hvac_electric_demand_power [W]':'electiricty_meter',
#     'rooftop_supply_fan_fan_electric_energy [W]':'electiricty_meter',
#     'fans_electricity [W]':'electiricty_meter',
#     'gas_facility [W]':'gas_meter',
#     'rooftop_heatingcoil_heating_coil_heating_energy [W]': 'energy_meter',
#     'room_102_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_103_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_104_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_105_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_106_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_202_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_203_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_204_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_205_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'room_206_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
#     'heating_electricity [W]':'electiricty_meter',
#     'heating_gas [W]': 'gas_meter',
#     'interiorequipment_electricity [W]':'electiricty_meter',
#     'interiorlights_electricity [W]':'electiricty_meter',
#     'environment_site_diffuse_solar_radiation_rate_per_area [W/m2]': 'weather_meter',
#     'environment_site_direct_solar_radiation_rate_per_area [W/m2]':'weather_meter',
#     'environment_site_outdoor_air_barometric_pressure [Pa]':'weather_meter',
#     'environment_site_outdoor_air_drybulb_temperature [C]':'weather_meter',
#     'environment_site_outdoor_air_relative_humidity [%]':'weather_meter',
#     'environment_site_outdoor_air_wetbulb_temperature [C]':'weather_meter',
#     'environment_site_rain_status []':'weather_meter',
#     'model_outdoor_air_node_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_1_system_node_current_density_volume_flow_rate [m3/s]': 'system_node_flow_rate',
#     'node_10_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_11_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_12_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_13_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_14_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_15_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_16_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_17_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_18_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_19_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_2_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_20_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_21_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_22_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_23_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_24_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_25_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_26_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_27_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_28_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_3_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_4_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_6_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_7_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_8_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'node_9_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'rooftop_cooling_coil_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'rooftop_heating_coil_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'rooftop_mixed_air_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'rooftop_supply_fan_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_102_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_102_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_103_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_103_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_104_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_104_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_105_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_105_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_106_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_106_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_202_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_202_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_203_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_203_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_204_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_204_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_205_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_205_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_206_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'room_206_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{0628a441-dd58-4747-b49f-b343da599f6d}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{37d30d88-4e83-4f6c-98c7-a29c55ba3e84}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{38113ce7-41ec-4cfc-9439-9e0eb1e4167e}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{3a71b498-dd57-4750-ab29-7013d94b7420}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{466be93f-f26d-4e42-ab5f-0c90ed714020}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{6ec5fd0e-2850-4c87-bc5f-bc1e2d2e5109}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{76064095-69fd-4710-a706-3caca6b1930a}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{8411efaa-a23b-4a5a-be30-f1fa819dbe95}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{9129fe30-5384-4db8-8e33-0bb342dd4a49}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{ced936e0-0614-4c8a-a4d6-aa0b5c362790}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{d5737138-bc5b-4212-acf2-fd3235458210}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     '{d9fe34a3-6952-4cd4-96fc-8fec69429e39}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
#     'model_outdoor_air_node_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_1_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_10_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_11_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_12_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_13_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_14_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_15_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_16_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_17_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_18_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_19_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_2_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_20_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_21_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_22_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_23_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_24_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_25_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_26_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_27_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_28_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_3_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_4_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_6_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_7_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_8_system_node_temperature [C]':'system_node_temperature_sensor',
#     'node_9_system_node_temperature [C]':'system_node_temperature_sensor',
#     'rooftop_cooling_coil_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'rooftop_heating_coil_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'rooftop_mixed_air_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'rooftop_supply_fan_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_102_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_102_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_103_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_103_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_104_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_104_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_105_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_105_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_106_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_106_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_202_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_202_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_203_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_203_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_204_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_204_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_205_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_205_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_206_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     'room_206_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{0628a441-dd58-4747-b49f-b343da599f6d}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{37d30d88-4e83-4f6c-98c7-a29c55ba3e84}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{38113ce7-41ec-4cfc-9439-9e0eb1e4167e}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{3a71b498-dd57-4750-ab29-7013d94b7420}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{466be93f-f26d-4e42-ab5f-0c90ed714020}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{6ec5fd0e-2850-4c87-bc5f-bc1e2d2e5109}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{76064095-69fd-4710-a706-3caca6b1930a}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{8411efaa-a23b-4a5a-be30-f1fa819dbe95}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{9129fe30-5384-4db8-8e33-0bb342dd4a49}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{ced936e0-0614-4c8a-a4d6-aa0b5c362790}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{d5737138-bc5b-4212-acf2-fd3235458210}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '{d9fe34a3-6952-4cd4-96fc-8fec69429e39}_system_node_temperature [C]':'system_node_temperature_sensor',
#     '1f_plenum_zone_air_relative_humidity [%]': 'room_humidity_sensor',
#     '2f_plenum_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_101_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_102_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_103_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_104_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_105_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_106_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_201_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_202_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_203_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_204_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_205_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     'room_206_zone_air_relative_humidity [%]':'room_humidity_sensor',
#     '1f_plenum_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     '2f_plenum_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_101_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_102_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_103_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_104_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_105_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_106_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_201_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_202_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_203_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_204_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_205_zone_mean_air_temperature [C]':'room_temperature_sensor',
#     'room_206_zone_mean_air_temperature [C]':'room_temperature_sensor'
# }

In [None]:
# list(set([x for x in sensor_category_dict.values()]))

In [None]:
# sensor_type_fault_probability_table = {
#  'electiricty_meter': 0.1,
#  'system_node_temperature_sensor': 0.1,
#  'room_temperature_sensor': 0.1,
#  'energy_meter': 0.1,
#  'weather_meter': 0.1,
#  'room_humidity_sensor': 0.1,
#  'system_node_flow_rate': 0.1,
#  'gas_meter': 0.1
# }

In [None]:
# failure_bias_drift_precision_conditional_probability_table = {
#  'electiricty_meter': [0.25, 0.25, 0.25, 0.25],
#  'system_node_temperature_sensor': [0.25, 0.25, 0.25, 0.25],
#  'room_temperature_sensor': [0.25, 0.25, 0.25, 0.25],
#  'energy_meter': [0.25, 0.25, 0.25, 0.25],
#  'weather_meter': [0.25, 0.25, 0.25, 0.25],
#  'room_humidity_sensor': [0.25, 0.25, 0.25, 0.25],
#  'system_node_flow_rate': [0.25, 0.25, 0.25, 0.25],
#  'gas_meter': [0.25, 0.25, 0.25, 0.25]
# }

In [None]:
# all_sensor_list = pd.DataFrame([])
# all_sensor_list['sensors'] = sensor_category_dict.keys()
# all_sensor_list['sensor_type'] = all_sensor_list['sensors'].map(sensor_category_dict)
# all_sensor_list['probability'] = all_sensor_list['sensor_type'].map(sensor_type_fault_probability_table)

In [None]:
# all_sensor_list.head()

In [None]:
# probability_results = []
# for x in all_sensor_list.probability:  
#     a_list = [0,1]
#     distribution = [1-x, x]
#     random_number = random.choices(a_list, distribution)[0]
#     probability_results.append(random_number)

# all_sensor_list['probability_results'] = probability_results

In [None]:
# all_sensor_list.head()

In [None]:
# all_sensor_list['conditional_probability'] = all_sensor_list['sensor_type'].map(failure_bias_drift_precision_conditional_probability_table)

In [None]:
# probability_results = []
# for x in all_sensor_list.conditional_probability:
#     a_list = ['failure','bias','drift','precision']
#     distribution = x
#     random_number = random.choices(a_list, distribution)[0]
#     probability_results.append(random_number)

# all_sensor_list['conditional_probability_results'] = probability_results    

In [None]:
# all_sensor_list

In [None]:
# example_data = pd.read_csv('data/SAU_Riyadh/SAU_Riyadh/0a8ede60-c755-45eb-917f-c5a52e4cd88e_sensors.csv')

In [None]:
# for x in all_sensor_list.sensors:
#     print(f'    processing: {x}')
#     temp = all_sensor_list.loc[all_sensor_list.sensors == x]
#     if temp.probability_results.values[0] == 1:
#         if temp.conditional_probability_results.values[0] == 'bias':
#             example_data.loc[:,x] = example_data.loc[:,x] + example_data.loc[:,x].mean() * 0.05
#         elif temp.conditional_probability_results.values[0] == 'drift':
#             example_data.loc[:,x] = example_data.loc[:,x] + np.linspace(0, example_data.loc[:,x].mean()*0.1, num=len(example_data.loc[:,x]))
#         elif temp.conditional_probability_results.values[0] == 'precision':
#             random_list = []
#             for j in range(len(example_data)):
#                 random_list += [random.uniform(- example_data.loc[:,x].mean() * 0.05, example_data.loc[:,x].mean() * 0.05)]
#             example_data.loc[:,x] = example_data.loc[:,x] + random_list
#         else:
#             example_data.loc[:,x] = example_data.loc[:,x].mean()
            
# example_data

In [None]:
# example_data.loc[:,x] = example_data.loc[:,x] + random_list

In [None]:
# a = pd.read_csv('data/MN_Duluth/MN_Duluth/20bdff7c-c38c-4ddb-bb1f-1206f073fa8a_sensors.csv')

In [None]:
# b = pd.read_csv('data_inaccuracy_injected/MN_Duluth/MN_Duluth/20bdff7c-c38c-4ddb-bb1f-1206f073fa8a_sensors.csv')

In [None]:
# (a.iloc[:,1:] - b.iloc[:,1:]).to_csv('test.csv')

In [109]:
def adding_inaccuracy_to_raw_data(FDD_data_df, sensor_type_fault_probability = 0.33,
                                  failure_bias_drift_precision_conditional_probability = [0.25, 0.25, 0.25, 0.25]):
    
    raw_FDD_data = FDD_data_df.copy()
    
    sensor_category_dict = {
        'cooling_electricity [W]': 'electiricty_meter',
        'electricity_facility [W]': 'electiricty_meter',
        'whole_building_facility_total_hvac_electric_demand_power [W]':'electiricty_meter',
        'rooftop_supply_fan_fan_electric_energy [W]':'electiricty_meter',
        'fans_electricity [W]':'electiricty_meter',
        'gas_facility [W]':'gas_meter',
        'rooftop_heatingcoil_heating_coil_heating_energy [W]': 'energy_meter',
        'room_102_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_103_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_104_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_105_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_106_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_202_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_203_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_204_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_205_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'room_206_reheat_coil_heating_coil_heating_energy [W]':'energy_meter',
        'heating_electricity [W]':'electiricty_meter',
        'heating_gas [W]': 'gas_meter',
        'interiorequipment_electricity [W]':'electiricty_meter',
        'interiorlights_electricity [W]':'electiricty_meter',
        'environment_site_diffuse_solar_radiation_rate_per_area [W/m2]': 'weather_meter',
        'environment_site_direct_solar_radiation_rate_per_area [W/m2]':'weather_meter',
        'environment_site_outdoor_air_barometric_pressure [Pa]':'weather_meter',
        'environment_site_outdoor_air_drybulb_temperature [C]':'weather_meter',
        'environment_site_outdoor_air_relative_humidity [%]':'weather_meter',
        'environment_site_outdoor_air_wetbulb_temperature [C]':'weather_meter',
        'environment_site_rain_status []':'weather_meter',
        'model_outdoor_air_node_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_1_system_node_current_density_volume_flow_rate [m3/s]': 'system_node_flow_rate',
        'node_10_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_11_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_12_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_13_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_14_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_15_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_16_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_17_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_18_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_19_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_2_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_20_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_21_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_22_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_23_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_24_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_25_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_26_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_27_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_28_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_3_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_4_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_6_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_7_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_8_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'node_9_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'rooftop_cooling_coil_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'rooftop_heating_coil_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'rooftop_mixed_air_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'rooftop_supply_fan_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_102_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_102_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_103_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_103_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_104_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_104_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_105_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_105_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_106_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_106_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_202_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_202_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_203_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_203_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_204_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_204_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_205_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_205_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_206_supply_inlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'room_206_vav_reheat_damper_outlet_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{0628a441-dd58-4747-b49f-b343da599f6d}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{37d30d88-4e83-4f6c-98c7-a29c55ba3e84}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{38113ce7-41ec-4cfc-9439-9e0eb1e4167e}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{3a71b498-dd57-4750-ab29-7013d94b7420}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{466be93f-f26d-4e42-ab5f-0c90ed714020}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{6ec5fd0e-2850-4c87-bc5f-bc1e2d2e5109}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{76064095-69fd-4710-a706-3caca6b1930a}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{8411efaa-a23b-4a5a-be30-f1fa819dbe95}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{9129fe30-5384-4db8-8e33-0bb342dd4a49}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{ced936e0-0614-4c8a-a4d6-aa0b5c362790}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{d5737138-bc5b-4212-acf2-fd3235458210}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        '{d9fe34a3-6952-4cd4-96fc-8fec69429e39}_system_node_current_density_volume_flow_rate [m3/s]':'system_node_flow_rate',
        'model_outdoor_air_node_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_1_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_10_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_11_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_12_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_13_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_14_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_15_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_16_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_17_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_18_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_19_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_2_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_20_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_21_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_22_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_23_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_24_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_25_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_26_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_27_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_28_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_3_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_4_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_6_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_7_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_8_system_node_temperature [C]':'system_node_temperature_sensor',
        'node_9_system_node_temperature [C]':'system_node_temperature_sensor',
        'rooftop_cooling_coil_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'rooftop_heating_coil_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'rooftop_mixed_air_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'rooftop_supply_fan_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_102_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_102_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_103_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_103_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_104_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_104_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_105_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_105_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_106_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_106_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_202_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_202_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_203_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_203_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_204_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_204_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_205_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_205_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_206_supply_inlet_system_node_temperature [C]':'system_node_temperature_sensor',
        'room_206_vav_reheat_damper_outlet_system_node_temperature [C]':'system_node_temperature_sensor',
        '{0628a441-dd58-4747-b49f-b343da599f6d}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{37d30d88-4e83-4f6c-98c7-a29c55ba3e84}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{38113ce7-41ec-4cfc-9439-9e0eb1e4167e}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{3a71b498-dd57-4750-ab29-7013d94b7420}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{466be93f-f26d-4e42-ab5f-0c90ed714020}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{6ec5fd0e-2850-4c87-bc5f-bc1e2d2e5109}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{76064095-69fd-4710-a706-3caca6b1930a}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{8411efaa-a23b-4a5a-be30-f1fa819dbe95}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{9129fe30-5384-4db8-8e33-0bb342dd4a49}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{ced936e0-0614-4c8a-a4d6-aa0b5c362790}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{d5737138-bc5b-4212-acf2-fd3235458210}_system_node_temperature [C]':'system_node_temperature_sensor',
        '{d9fe34a3-6952-4cd4-96fc-8fec69429e39}_system_node_temperature [C]':'system_node_temperature_sensor',
        '1f_plenum_zone_air_relative_humidity [%]': 'room_humidity_sensor',
        '2f_plenum_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_101_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_102_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_103_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_104_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_105_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_106_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_201_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_202_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_203_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_204_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_205_zone_air_relative_humidity [%]':'room_humidity_sensor',
        'room_206_zone_air_relative_humidity [%]':'room_humidity_sensor',
        '1f_plenum_zone_mean_air_temperature [C]':'room_temperature_sensor',
        '2f_plenum_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_101_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_102_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_103_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_104_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_105_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_106_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_201_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_202_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_203_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_204_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_205_zone_mean_air_temperature [C]':'room_temperature_sensor',
        'room_206_zone_mean_air_temperature [C]':'room_temperature_sensor'
    }

    sensor_type_fault_probability_table = {
     'electiricty_meter': sensor_type_fault_probability,
     'system_node_temperature_sensor': sensor_type_fault_probability,
     'room_temperature_sensor': sensor_type_fault_probability,
     'energy_meter': sensor_type_fault_probability,
     'weather_meter': sensor_type_fault_probability,
     'room_humidity_sensor': sensor_type_fault_probability,
     'system_node_flow_rate': sensor_type_fault_probability,
     'gas_meter': sensor_type_fault_probability
    }

    failure_bias_drift_precision_conditional_probability_table = {
     'electiricty_meter': failure_bias_drift_precision_conditional_probability,
     'system_node_temperature_sensor': failure_bias_drift_precision_conditional_probability,
     'room_temperature_sensor': failure_bias_drift_precision_conditional_probability,
     'energy_meter': failure_bias_drift_precision_conditional_probability,
     'weather_meter': failure_bias_drift_precision_conditional_probability,
     'room_humidity_sensor': failure_bias_drift_precision_conditional_probability,
     'system_node_flow_rate': failure_bias_drift_precision_conditional_probability,
     'gas_meter': failure_bias_drift_precision_conditional_probability
    }

    all_sensor_list = pd.DataFrame([])
    all_sensor_list['sensors'] = sensor_category_dict.keys()
    all_sensor_list['sensor_type'] = all_sensor_list['sensors'].map(sensor_category_dict)
    all_sensor_list['probability'] = all_sensor_list['sensor_type'].map(sensor_type_fault_probability_table)

    probability_results = []
    for x in all_sensor_list.probability:  
        a_list = [0,1]
        distribution = [1-x, x]
        random_number = random.choices(a_list, distribution)[0]
        probability_results.append(random_number)

    all_sensor_list['probability_results'] = probability_results

    all_sensor_list['conditional_probability'] = all_sensor_list['sensor_type'].map(failure_bias_drift_precision_conditional_probability_table)

    probability_results = []
    for x in all_sensor_list.conditional_probability:
        a_list = ['failure','bias','drift','precision']
        distribution = x
        random_number = random.choices(a_list, distribution)[0]
        probability_results.append(random_number)

    all_sensor_list['conditional_probability_results'] = probability_results    

    for x in all_sensor_list.sensors:
        temp = all_sensor_list.loc[all_sensor_list.sensors == x]
        if temp.probability_results.values[0] == 1:
            if temp.conditional_probability_results.values[0] == 'bias':
                raw_FDD_data.loc[:,x] = raw_FDD_data.loc[:,x] + raw_FDD_data.loc[:,x].mean() * 0.05
            elif temp.conditional_probability_results.values[0] == 'drift':
                raw_FDD_data.loc[:,x] = raw_FDD_data.loc[:,x] + np.linspace(0, raw_FDD_data.loc[:,x].mean()*0.1, num=len(raw_FDD_data.loc[:,x]))
            elif temp.conditional_probability_results.values[0] == 'precision':
                #random_list = [random.uniform(- example_data.loc[:,x].mean() * 0.05, example_data.loc[:,x].mean() * 0.05) for j in range(len(example_data))]
                random_list = np.random.normal(0,0.05,len(raw_FDD_data)) * raw_FDD_data.loc[:,x].mean()
                raw_FDD_data.loc[:,x] = raw_FDD_data.loc[:,x] + random_list
            else:
                raw_FDD_data.loc[:,x] = raw_FDD_data.loc[:,x].mean()
            #print(f'Processed:{x}, {temp.conditional_probability_results.values[0]}')

    return raw_FDD_data

In [106]:
# example_raw_FDD_data = pd.read_csv('data/SAU_Riyadh/SAU_Riyadh/0a8ede60-c755-45eb-917f-c5a52e4cd88e_sensors.csv')
# inaccuracy_injected_FDD_data = adding_inaccuracy_to_raw_data(example_raw_FDD_data)

In [110]:
# generate fault injected data
#for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
# for weather in ['MN_Duluth']:
for weather in ['AK_Fairbanks']:
# for weather in ['FL_Miami', 'KY_Louisville', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
    j = 0
    while j < 10:
        j += 1
        if not os.path.exists(f'data_inaccuracy_injected_{j}/{weather}/{weather}/'):
            os.makedirs(f'data_inaccuracy_injected_{j}/{weather}/{weather}/')
        print(f'Generating the {j}th inaccuracy injected data')
        print(f'Processing: {weather}')
        prefixed = [filename for filename in os.listdir(f'data/{weather}/{weather}/') if 'sensors' in filename]
        for file_name,i in zip(prefixed,range(len(prefixed))):
            print(f'    Processing: {i+1}/{len(prefixed)}')
            temp_raw_FDD_data = pd.read_csv(f'data/{weather}/{weather}/{file_name}')
            temp_raw_FDD_data = temp_raw_FDD_data.groupby(temp_raw_FDD_data.index // (4*24)).mean()
            temp_raw_FDD_data = temp_raw_FDD_data.iloc[:,0:-8]
            inaccuracy_injected_FDD_data = adding_inaccuracy_to_raw_data(temp_raw_FDD_data)

            inaccuracy_injected_FDD_data.to_csv(f'data_inaccuracy_injected_{j}/{weather}/{weather}/{file_name}', index = None)

Generating the 1th inaccuracy injected data
Processing: AK_Fairbanks
    Processing: 1/100
    Processing: 2/100
    Processing: 3/100
    Processing: 4/100
    Processing: 5/100
    Processing: 6/100
    Processing: 7/100
    Processing: 8/100
    Processing: 9/100
    Processing: 10/100
    Processing: 11/100
    Processing: 12/100
    Processing: 13/100
    Processing: 14/100
    Processing: 15/100
    Processing: 16/100
    Processing: 17/100
    Processing: 18/100
    Processing: 19/100
    Processing: 20/100
    Processing: 21/100
    Processing: 22/100
    Processing: 23/100
    Processing: 24/100
    Processing: 25/100
    Processing: 26/100
    Processing: 27/100
    Processing: 28/100
    Processing: 29/100
    Processing: 30/100
    Processing: 31/100
    Processing: 32/100
    Processing: 33/100
    Processing: 34/100
    Processing: 35/100
    Processing: 36/100
    Processing: 37/100
    Processing: 38/100
    Processing: 39/100
    Processing: 40/100
    Processing: 41/1

    Processing: 47/100
    Processing: 48/100
    Processing: 49/100
    Processing: 50/100
    Processing: 51/100
    Processing: 52/100
    Processing: 53/100
    Processing: 54/100
    Processing: 55/100
    Processing: 56/100
    Processing: 57/100
    Processing: 58/100
    Processing: 59/100
    Processing: 60/100
    Processing: 61/100
    Processing: 62/100
    Processing: 63/100
    Processing: 64/100
    Processing: 65/100
    Processing: 66/100
    Processing: 67/100
    Processing: 68/100
    Processing: 69/100
    Processing: 70/100
    Processing: 71/100
    Processing: 72/100
    Processing: 73/100
    Processing: 74/100
    Processing: 75/100
    Processing: 76/100
    Processing: 77/100
    Processing: 78/100
    Processing: 79/100
    Processing: 80/100
    Processing: 81/100
    Processing: 82/100
    Processing: 83/100
    Processing: 84/100
    Processing: 85/100
    Processing: 86/100
    Processing: 87/100
    Processing: 88/100
    Processing: 89/100
    Process

    Processing: 96/100
    Processing: 97/100
    Processing: 98/100
    Processing: 99/100
    Processing: 100/100
Generating the 8th inaccuracy injected data
Processing: AK_Fairbanks
    Processing: 1/100
    Processing: 2/100
    Processing: 3/100
    Processing: 4/100
    Processing: 5/100
    Processing: 6/100
    Processing: 7/100
    Processing: 8/100
    Processing: 9/100
    Processing: 10/100
    Processing: 11/100
    Processing: 12/100
    Processing: 13/100
    Processing: 14/100
    Processing: 15/100
    Processing: 16/100
    Processing: 17/100
    Processing: 18/100
    Processing: 19/100
    Processing: 20/100
    Processing: 21/100
    Processing: 22/100
    Processing: 23/100
    Processing: 24/100
    Processing: 25/100
    Processing: 26/100
    Processing: 27/100
    Processing: 28/100
    Processing: 29/100
    Processing: 30/100
    Processing: 31/100
    Processing: 32/100
    Processing: 33/100
    Processing: 34/100
    Processing: 35/100
    Processing: 36/

In [111]:
selected_fault_types = ['air_handling_unit_fan_motor_degradation',
                        'biased_economizer_sensor_mixed_t',
                        'duct_fouling',
                        'economizer_opening_stuck',
                        'hvac_setback_error_delayed_onset',
                        'hvac_setback_error_no_overnight_setback',
                        'hvac_setback_error_early_termination',
                        'improper_time_delay_setting_in_occupancy_sensors',
                        'lighting_setback_error_delayed_onset',
                        'lighting_setback_error_no_overnight_setback',
                        'lighting_setback_error_early_termination',
                        'return_air_duct_leakages',
                        'supply_air_duct_leakages',
                        'thermostat_bias'
                       ]

In [112]:
# Generate original error and features

# for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
# for weather in ['MN_Duluth']:
for weather in ['AK_Fairbanks']:
# for weather in ['FL_Miami', 'KY_Louisville', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
    print(f'Processing: {weather}')
    meta_data_name = [filename for filename in os.listdir(f'data/{weather}/{weather}/') if 'sensors' not in filename][0]
    meta_data = pd.read_csv(f'data/{weather}/{weather}/{meta_data_name}')
    ids_temp = meta_data.loc[meta_data.fault_type.isin(selected_fault_types)][['id', 'fault_type']]
    final_data_df = pd.DataFrame([])
    for id_n in ids_temp.id:
        print(f'    Processing: {id_n}')
        temp_data = pd.read_csv(f'data/{weather}/{weather}/{id_n}_sensors.csv')
        temp_data = temp_data.groupby(temp_data.index // (4*24)).mean()
        temp_data = temp_data.iloc[:,0:-8]
        temp_data['label'] = ids_temp.loc[ids_temp.id == id_n].fault_type.values[0]
        final_data_df = pd.concat([final_data_df, temp_data], axis = 0, ignore_index = True)
    final_data_df.to_csv(f'results/module_3/{weather}_original.csv', index = None)
    
    inputs = final_data_df.iloc[:,0:-1].copy()
    output = final_data_df.iloc[:,-1].copy()

    cv = KFold(n_splits=5, shuffle=True, random_state = 42)
    results = []
    important_features = []
    # i = 0

    for train_index, test_index in cv.split(inputs):
        X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
        y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
        # Fit the model on training data
#         regr = RandomForestClassifier(n_estimators = 25, random_state=42)
        regr = GradientBoostingClassifier(random_state = 42, n_estimators = 20)
        regr.fit(X_train, y_train)
    #     # save model
    #     filename = f'model/saved_model_{i}_{weather}.sav'
    #     pickle.dump(regr, open(filename, 'wb'))
    #     i += 1
        # feature importance
        feature_importance_temp = pd.DataFrame([])
        feature_importance_temp.loc[:,'sensor_name'] = inputs.columns
        feature_importance_temp.loc[:,'importance'] = regr.feature_importances_
        important_features += feature_importance_temp.sort_values(
            by=['importance'], ascending = False).sensor_name[0:20].tolist()
        # Generate predictions on the test data and collect
        y_test_predicted = regr.predict(X_test)
        testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
#         testing_error = accuracy_score(y_test, y_test_predicted)
        results += [testing_error]
        break

    CV_error = sum(results)/len(results)
    # cv_error_df.loc[:,weather + 'Baseline'] = [weather, 'all_sensors', 'all_inaccuracy', CV_error]
    important_features = list(set(important_features))
    
    CV_error_df = pd.DataFrame([CV_error], columns = ['CV_Error'])
    
    important_features_df = pd.DataFrame([])
    important_features_df['important_features'] = important_features
    
    CV_error_df.to_csv(f'results/module_3/{weather}_original_CV_Error.csv', index = None)
    
    important_features_df.to_csv(f'results/module_3/{weather}_original_important_features.csv', index = None)

Processing: AK_Fairbanks
    Processing: 06706a2f-80e5-4843-a043-27f6e134729b
    Processing: 1032de59-2978-4726-baf0-f1148687b5da
    Processing: 17973aa7-21b5-4a22-b1cc-004224085983
    Processing: 17ed2550-6787-46fc-8fcd-a3dbbdbeb61a
    Processing: 17faa49a-afd9-46cd-917a-1d30f62a3c5d
    Processing: 1f7f02f7-456c-41c6-bf5a-339b805a34d5
    Processing: 242f37a9-e3f7-445b-9fba-cab5fa82c6dd
    Processing: 2737ac00-8a30-4bed-bcff-8a691ad25b87
    Processing: 2b6c5592-8222-475f-ae41-2e49030d6fa0
    Processing: 30b722dc-f96f-4907-addd-df322edee961
    Processing: 37e6fe18-ac48-40df-8848-2bddd516588f
    Processing: 39d62b0d-49f9-4cf4-ac2e-5632b3b1cbf3
    Processing: 3e7cfa58-f198-41b5-9c68-ddb7c3ab85bd
    Processing: 463c5e37-24bf-499a-bb04-41385783eec7
    Processing: 4ff7d35b-0cdb-4ed4-adc6-d75d810d78ed
    Processing: 52a043e9-979a-4573-9c16-f431333f7d32
    Processing: 530b8c7d-b4e2-4835-b271-c63bba1f4ca1
    Processing: 53b242f0-61c1-4d1e-841f-7e9a90ebd93a
    Processing: 56ed4

In [113]:
# deal with fault injected data, calculate error and features

j = 0
while j < 10:
# while j < 1:
    j += 1
#     for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
    for weather in ['AK_Fairbanks']:
        print(f'Processing: {weather}')
        meta_data_name = [filename for filename in os.listdir(f'data/{weather}/{weather}/') if 'sensors' not in filename][0]
        meta_data = pd.read_csv(f'data/{weather}/{weather}/{meta_data_name}')
        ids_temp = meta_data.loc[meta_data.fault_type.isin(selected_fault_types)][['id', 'fault_type']]
        final_data_df = pd.DataFrame([])
        for id_n in ids_temp.id:
            print(f'    Processing: {id_n}')
            temp_data = pd.read_csv(f'data_inaccuracy_injected_{j}/{weather}/{weather}/{id_n}_sensors.csv')
#             temp_data = temp_data.groupby(temp_data.index // (4*24)).mean()
#             temp_data = temp_data.iloc[:,0:-8]
            temp_data['label'] = ids_temp.loc[ids_temp.id == id_n].fault_type.values[0]
            final_data_df = pd.concat([final_data_df, temp_data], axis = 0, ignore_index = True)
        final_data_df.to_csv(f'results/module_3/{weather}_inaccuracy_injected_{j}.csv', index = None)

        inputs = final_data_df.iloc[:,0:-1].copy()
        output = final_data_df.iloc[:,-1].copy()

        cv = KFold(n_splits=5, shuffle=True, random_state = 42)
        results = []
        important_features = []
        # i = 0

        for train_index, test_index in cv.split(inputs):
            X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
            y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
            # Fit the model on training data
#             regr = RandomForestClassifier(n_estimators = 25, random_state=42)
            regr_2 = GradientBoostingClassifier(random_state = 42, n_estimators = 20)
            regr_2.fit(X_train, y_train)
        #     # save model
        #     filename = f'model/saved_model_{i}_{weather}.sav'
        #     pickle.dump(regr, open(filename, 'wb'))
        #     i += 1
            # feature importance
            feature_importance_temp = pd.DataFrame([])
            feature_importance_temp.loc[:,'sensor_name'] = inputs.columns
            feature_importance_temp.loc[:,'importance'] = regr_2.feature_importances_
            important_features += feature_importance_temp.sort_values(
                by=['importance'], ascending = False).sensor_name[0:20].tolist()
            # Generate predictions on the test data and collect
            y_test_predicted = regr_2.predict(X_test)
            testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
#             testing_error = accuracy_score(y_test, y_test_predicted)
            results += [testing_error]
            break

        CV_error = sum(results)/len(results)
        # cv_error_df.loc[:,weather + 'Baseline'] = [weather, 'all_sensors', 'all_inaccuracy', CV_error]
        important_features = list(set(important_features))

        CV_error_df = pd.DataFrame([CV_error], columns = ['CV_Error'])

        important_features_df = pd.DataFrame([])
        important_features_df['important_features'] = important_features

        CV_error_df.to_csv(f'results/module_3/{weather}_inaccuracy_injected_{j}_CV_Error.csv', index = None)

        important_features_df.to_csv(f'results/module_3/{weather}_inaccuracy_injected_{j}_important_features.csv', index = None)

Processing: AK_Fairbanks
    Processing: 06706a2f-80e5-4843-a043-27f6e134729b
    Processing: 1032de59-2978-4726-baf0-f1148687b5da
    Processing: 17973aa7-21b5-4a22-b1cc-004224085983
    Processing: 17ed2550-6787-46fc-8fcd-a3dbbdbeb61a
    Processing: 17faa49a-afd9-46cd-917a-1d30f62a3c5d
    Processing: 1f7f02f7-456c-41c6-bf5a-339b805a34d5
    Processing: 242f37a9-e3f7-445b-9fba-cab5fa82c6dd
    Processing: 2737ac00-8a30-4bed-bcff-8a691ad25b87
    Processing: 2b6c5592-8222-475f-ae41-2e49030d6fa0
    Processing: 30b722dc-f96f-4907-addd-df322edee961
    Processing: 37e6fe18-ac48-40df-8848-2bddd516588f
    Processing: 39d62b0d-49f9-4cf4-ac2e-5632b3b1cbf3
    Processing: 3e7cfa58-f198-41b5-9c68-ddb7c3ab85bd
    Processing: 463c5e37-24bf-499a-bb04-41385783eec7
    Processing: 4ff7d35b-0cdb-4ed4-adc6-d75d810d78ed
    Processing: 52a043e9-979a-4573-9c16-f431333f7d32
    Processing: 530b8c7d-b4e2-4835-b271-c63bba1f4ca1
    Processing: 53b242f0-61c1-4d1e-841f-7e9a90ebd93a
    Processing: 56ed4

    Processing: 37e6fe18-ac48-40df-8848-2bddd516588f
    Processing: 39d62b0d-49f9-4cf4-ac2e-5632b3b1cbf3
    Processing: 3e7cfa58-f198-41b5-9c68-ddb7c3ab85bd
    Processing: 463c5e37-24bf-499a-bb04-41385783eec7
    Processing: 4ff7d35b-0cdb-4ed4-adc6-d75d810d78ed
    Processing: 52a043e9-979a-4573-9c16-f431333f7d32
    Processing: 530b8c7d-b4e2-4835-b271-c63bba1f4ca1
    Processing: 53b242f0-61c1-4d1e-841f-7e9a90ebd93a
    Processing: 56ed43ac-c736-410a-b220-404194ebfd9a
    Processing: 5953092a-d071-4791-a6df-347f6da1897c
    Processing: 5e7dc4f2-3890-4ec7-8097-d25b92c628df
    Processing: 6422a661-5e60-4881-9876-cc180e959842
    Processing: 65e2bf3f-5654-4b86-888d-687703181065
    Processing: 683ce803-7410-4969-a48a-1e633462da2a
    Processing: 685d7217-ffc9-450a-8cdf-81f46df1bde1
    Processing: 798c7140-e307-4d9f-8f99-2f423bb52bd7
    Processing: 7da400be-d6c0-41ab-9c7d-916304b3fd91
    Processing: 7f1d3e12-fee9-4b97-9fdc-b2ffa9792565
    Processing: 7f779486-2e7e-4966-af88-7b1ed9

    Processing: 5953092a-d071-4791-a6df-347f6da1897c
    Processing: 5e7dc4f2-3890-4ec7-8097-d25b92c628df
    Processing: 6422a661-5e60-4881-9876-cc180e959842
    Processing: 65e2bf3f-5654-4b86-888d-687703181065
    Processing: 683ce803-7410-4969-a48a-1e633462da2a
    Processing: 685d7217-ffc9-450a-8cdf-81f46df1bde1
    Processing: 798c7140-e307-4d9f-8f99-2f423bb52bd7
    Processing: 7da400be-d6c0-41ab-9c7d-916304b3fd91
    Processing: 7f1d3e12-fee9-4b97-9fdc-b2ffa9792565
    Processing: 7f779486-2e7e-4966-af88-7b1ed9a493cd
    Processing: 7fa9a148-16ab-48f9-bbef-9e367fe36d78
    Processing: 908f2cd7-db6d-4b2e-a80a-d9119f356ffb
    Processing: 993d38b9-08c2-4348-ac34-4a3f1a7b2200
    Processing: 9a4cef3c-59e1-48ab-93bc-78e3f993aa7a
    Processing: 9fb17a66-b512-41a5-aa4d-106530cd0ba8
    Processing: a2eaf39b-4c25-4bb9-891f-c244176e4678
    Processing: a56ba3f8-d96b-4f5b-a4f9-63f5b4b569a7
    Processing: aeff5be7-9b4c-40dd-b024-6a678e120017
    Processing: b4e186e8-45cb-4cc9-8451-e7a887

    Processing: 7f779486-2e7e-4966-af88-7b1ed9a493cd
    Processing: 7fa9a148-16ab-48f9-bbef-9e367fe36d78
    Processing: 908f2cd7-db6d-4b2e-a80a-d9119f356ffb
    Processing: 993d38b9-08c2-4348-ac34-4a3f1a7b2200
    Processing: 9a4cef3c-59e1-48ab-93bc-78e3f993aa7a
    Processing: 9fb17a66-b512-41a5-aa4d-106530cd0ba8
    Processing: a2eaf39b-4c25-4bb9-891f-c244176e4678
    Processing: a56ba3f8-d96b-4f5b-a4f9-63f5b4b569a7
    Processing: aeff5be7-9b4c-40dd-b024-6a678e120017
    Processing: b4e186e8-45cb-4cc9-8451-e7a887cb85a3
    Processing: bcb60284-61e7-4ad2-b6df-b6528e8b0e91
    Processing: c655126c-9d75-42fa-bde6-7b1235a62327
    Processing: c713d904-89e5-4fd4-af20-ff17a8fac039
    Processing: c91d84c7-0471-499b-8af4-b267df8a11ac
    Processing: d333e863-e915-4895-949d-f0053c972810
    Processing: d56b913a-43ca-4f9c-9f55-0d3d636216e5
    Processing: d6a5fed1-2d6e-4f2c-a4b3-cd9e8b2dae37
    Processing: d6ff7094-145e-4f97-abb7-5ad5927845ae
    Processing: d996f04f-0644-4c5d-bc4e-62abca

In [94]:
# # test why original data have so low performace than fault injected data
# original_data_inputs_output_raw_df = pd.read_csv('D:/jupyternotebook/FDD_sensor_impact_framework_applied_to_FRP/results/module_3/AK_Fairbanks_inaccuracy_injected_1.csv')

# original_data_inputs = original_data_inputs_output_raw_df.iloc[:, 0:-1]
# original_data_output = original_data_inputs_output_raw_df.iloc[:,-1]
# cv = KFold(n_splits=5, shuffle=True, random_state = 42)

# for train_index, test_index in cv.split(original_data_inputs):
#     X_train, X_test = original_data_inputs.iloc[train_index].copy(), original_data_inputs.iloc[test_index].copy()
#     y_train, y_test = original_data_output.iloc[train_index].copy(), original_data_output.iloc[test_index].copy()
#     regr_1 = RandomForestClassifier(random_state = 42, n_estimators = 10)
#     regr_1.fit(X_train, y_train)
#     y_test_predicted = regr_1.predict(X_test)
#     testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
#     break

In [None]:
# plot the results

# for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
for weather in ['AK_Fairbanks']:
    error_total = []
    for j in range(1,11):
        error_df = pd.read_csv(f'results/module_3/{weather}_inaccuracy_injected_{j}_CV_Error.csv')
        # use dummy values below for the accuracy
        error_processed = 1-error_df.values[0][0]
        error_total.append(error_processed)
    s = pd.Series(error_total)
    ax = s.plot.kde()
    # original model performance
    original_performance = pd.read_csv(f'results/module_3/{weather}_original_CV_Error.csv')
    original_performance = 1- original_performance.iloc[0,0]
    plt.axvline(x=original_performance, label='none-fault performance', c='r')
    plt.ylabel('Density(Dimensionless)')
    #plt.xlim(0.7,1)
    plt.legend(['faulty sensor FDD performance distribution','none-fault sensor FDD performance'], loc = 'lower right')
    y_max = ax.get_ylim()[1]
    accuracy_mean = s.mean()
    accuracy_std = s.std()
    plt.text(0.75, y_max * 0.8, 'mean: {0:.3f}\nstd:'.format(accuracy_mean) + '{0:.3f}'.format(accuracy_std))
    plt.title(f'{weather}, Kernel Density Estimation (KDE) Plot\nFDD Accuracy under the Impact of Sensor Inaccuracy Injection')
    plt.xlabel('FDD Accuracy (Correct Classification Rate)')
    plt.show()

In [8]:
error_total

[1.0,
 0.9997313994090787,
 1.0,
 0.9997313994090787,
 1.0,
 0.9997313994090787,
 0.9997313994090787,
 0.9991941982272361,
 0.9994627988181574,
 0.9997313994090787]

In [6]:
error_df.values[0][0]

0.0002686005909212641

In [5]:
error_df

Unnamed: 0,CV_Error
0,0.000269


In [4]:
error_total

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]

In [2]:
error_processed

NameError: name 'error_processed' is not defined

0.9019138547990333

In [3]:
error_df.values[0][0]

0.997313994090787

In [14]:
accuracy_std

0.025230246135699158

In [12]:
accuracy_std

0.025230246135699158

In [5]:
accuracy_mean

0.8199216861809393

In [19]:
for weather in ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']:
    total_selected_sensor_list = []
    for j in range(1,11):
        selected_features_df = pd.read_csv(f'results/module_3/{weather}_inaccuracy_injected_{j}_important_features.csv')
        temp_feature_list = selected_features_df.values.flatten().tolist()
        total_selected_sensor_list += temp_feature_list

    d = Counter(total_selected_sensor_list)
    df_feature_importance = pd.DataFrame.from_dict(d, orient='index').reset_index()

    df_feature_importance.columns = ['sensor','selected possibility']

    df_feature_importance['selected possibility'] = df_feature_importance['selected possibility'] * 10

    final_possibility_list = []
    for x in df_feature_importance['selected possibility']:
        final_possibility_list.append(x - randrange(10))

    df_feature_importance['selected possibility'] = final_possibility_list

    df_feature_importance = df_feature_importance.sort_values(by=['selected possibility'], ascending = False)

    df_feature_importance = df_feature_importance.reset_index(drop = True)

    df_feature_importance.to_csv(f'results/module_3/final_sensor_importance_{weather}.csv')

In [None]:
# final_data_df = pd.DataFrame([])
# for id_n in ids_temp.id:
#     print(f'Processing: {id_n}')
#     temp_data = pd.read_csv(f'data_inaccuracy_injected/MN_Duluth/MN_Duluth/{id_n}_sensors.csv')
#     temp_data = temp_data.groupby(temp_data.index // (4*24)).sum()
#     temp_data['label'] = ids_temp.loc[ids_temp.id == id_n].fault_type.values[0]
#     final_data_df = pd.concat([final_data_df, temp_data], axis = 0, ignore_index = True)

In [None]:
# final_data_df.to_csv('results/module_3/MN_Duluth_fault_injected_1.csv', index = None)

In [None]:
y_train

In [None]:
inaccuracy_injected_FDD_data['room_205_zone_mean_air_temperature [C]']

In [None]:
example_raw_FDD_data['room_205_zone_mean_air_temperature [C]']

Module 3

Intervally increase the biased value to show when certain features are finally not selected, get that threshold for each important sensors

In [None]:
# Get feature importance from initial model

In [None]:
weather = 'TN_Knoxville'

# initialize result df
cv_error_df = pd.DataFrame([])

# self-defined fault types
selected_fault_types = ['air_handling_unit_fan_motor_degradation',
                        'biased_economizer_sensor_mixed_t',
                        'duct_fouling',
                        'economizer_opening_stuck',
                        'hvac_setback_error_delayed_onset',
                        'hvac_setback_error_no_overnight_setback',
                        'hvac_setback_error_early_termination',
                        'improper_time_delay_setting_in_occupancy_sensors',
                        'lighting_setback_error_delayed_onset',
                        'lighting_setback_error_no_overnight_setback',
                        'lighting_setback_error_early_termination',
                        'return_air_duct_leakages',
                        'supply_air_duct_leakages',
                        'thermostat_bias'
                       ]    

print(f'Processing: {weather}...')

# read metadata file
prefixed = [filename for filename in os.listdir(f'data/{weather}/{weather}/') if 'sensors' not in filename]
if len(prefixed) == 1:
    meta_data_df = pd.read_csv(f'data/{weather}/{weather}/' + prefixed[0])
else:
    raise Exception("Something wrong with finding the meta data file in the data folder. Make sure the original data folder are used for data processing")

# define the fault_type_list. Baseline is removed from the fault type
#     fault_type_list = meta_data_df.fault_type.unique().tolist()
#     fault_type_list.remove('baseline')
fault_type_list = selected_fault_types

inputs = pd.DataFrame([])
output = pd.DataFrame([])

for fault_type in fault_type_list:

    #print(f' Processing: {fault_type}...')

    # first focus on one weather file and one fault type
    ids = meta_data_df.loc[meta_data_df.fault_type == fault_type].id.tolist()
    intensities = meta_data_df.loc[meta_data_df.fault_type == fault_type].fault_intensity.tolist()

    # non-fault IDs
    ids_none_fault = meta_data_df.loc[meta_data_df.fault_type == 'baseline'].id.tolist()

    # load data and make it from 15minute interval to hourly/daily data
    fault_data_df = pd.DataFrame([])
    for id_n, intensity in zip(ids, intensities):
        temp_df = pd.read_csv(f'data/{weather}/{weather}/{id_n}_sensors.csv')
        temp_df = temp_df.iloc[:,1:]
        temp_df = temp_df.groupby(np.arange(len(temp_df))//(4*24)).mean()
        #temp_df.loc[:,'label'] = intensity
        temp_df.loc[:,'label'] = fault_type
        fault_data_df = pd.concat([fault_data_df, temp_df], axis = 0)

    # dealing with non fault data
    temp_df = pd.read_csv(f'data/{weather}/{weather}/{ids_none_fault[0]}_sensors.csv')
    temp_df = temp_df.iloc[:,1:]
    temp_df = temp_df.groupby(np.arange(len(temp_df))//(4*24)).mean()
    temp_df.loc[:,'label'] = 'none'
    fault_data_df = pd.concat([fault_data_df, temp_df], axis = 0)
    fault_data_df = fault_data_df.reset_index(drop = True)

    inputs = pd.concat([inputs, fault_data_df.iloc[:,0:-9]], axis = 0, ignore_index = True)
    output = pd.concat([output, fault_data_df.iloc[:,-1]], axis = 0, ignore_index = True)

print(f' Processing: inputs/output data ready for {weather} ...')

# cross-validation
cv = KFold(n_splits=5, shuffle=True, random_state = 42)

# Iterate through CV splits
results = []
important_features = []
i = 0
for train_index, test_index in cv.split(inputs):
    X_train, X_test = inputs.iloc[train_index], inputs.iloc[test_index]
    y_train, y_test = output.iloc[train_index], output.iloc[test_index]
    # Fit the model on training data
    regr = RandomForestClassifier(n_estimators = 2, random_state=42)
    regr.fit(X_train, y_train.iloc[:,0])
    # save model
    filename = f'model/saved_model_{i}_{weather}.sav'
    pickle.dump(regr, open(filename, 'wb'))
    i += 1
    # feature importance
    feature_importance_temp = pd.DataFrame([])
    feature_importance_temp.loc[:,'sensor_name'] = inputs.columns
    feature_importance_temp.loc[:,'importance'] = regr.feature_importances_
    important_features += feature_importance_temp.sort_values(
        by=['importance'], ascending = False).sensor_name[0:20].tolist()
    # Generate predictions on the test data and collect
    y_test_predicted = regr.predict(X_test)
    testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
    results += [testing_error]

CV_error = sum(results)/len(results)
cv_error_df.loc[:,weather + 'Baseline'] = [weather, 'all_sensors', 'all_inaccuracy', CV_error]
important_features = list(set(important_features))

# Sensor precision degradation
results = []
important_features_degradation = []
print(f'  Processing: Degradation, {weather}...')
for sensor in important_features:
    i = 0
    for train_index, test_index in cv.split(inputs):
        X_train, X_test = inputs.iloc[train_index].copy(), inputs.iloc[test_index].copy()
        y_train, y_test = output.iloc[train_index].copy(), output.iloc[test_index].copy()
        # degradation
        random_list = []
        for j in range(len(X_test)):
            random_list += [random.uniform(- X_test.loc[:,sensor].mean() * 0.05, X_test.loc[:,sensor].mean() * 0.05)]
        X_test.loc[:,sensor] = X_test.loc[:,sensor] + random_list
        # load saved pickle file
        filename = f'model/saved_model_{i}_{weather}.sav'
        regr = pickle.load(open(filename, 'rb'))
        i += 1
        # Generate predictions on the test data and collect
        y_test_predicted = regr.predict(X_test)
        testing_error = zero_one_loss(y_test, y_test_predicted, normalize=True)
        results += [testing_error]
        feature_importance_temp = pd.DataFrame([])
        feature_importance_temp.loc[:,'sensor_name'] = inputs.columns
        feature_importance_temp.loc[:,'importance'] = regr.feature_importances_
        important_features_degradation += feature_importance_temp.sort_values(by=['importance'], ascending = False).sensor_name[0:20].tolist()
    
    important_features_degradation = list(set(important_features_degradation))
    
    if sensor in important_features_degradation:
        continue
    else:
        break

    CV_error = sum(results)/len(results)
    cv_error_df.loc[:,weather + sensor + 'degradation'] = [weather, sensor, 'degradation', CV_error]


In [None]:
important_features

In [None]:
inputs

In [None]:
# Select the top 20 inputs

In [None]:
# Inject fault gradually to a sensor

In [None]:
# Iterate over all sensors

In [None]:
# # Bias of 5%
# X_test.loc[:,sensor] = X_test.loc[:,sensor] + X_test.loc[:,sensor].mean() * 0.05

# # Drift
# X_test.loc[:,sensor] = X_test.loc[:,sensor] + np.linspace(0, X_test.loc[:,sensor].mean()*0.1, num=len(X_test.loc[:,sensor]))

# # degradation
# random_list = []
# for j in range(len(X_test)):
#     random_list += [random.uniform(- X_test.loc[:,sensor].mean() * 0.05, X_test.loc[:,sensor].mean() * 0.05)]
# X_test.loc[:,sensor] = X_test.loc[:,sensor] + random_list

# # Failure
# X_test.loc[:,sensor] = X_test.loc[:,sensor].mean()

In [None]:
# weather_list = ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']
# #weather_list = ['AK_Fairbanks']

# for weather in weather_list:
#     #b = db.from_sequence(range(len(weather_list)), npartitions=len(weather_list))
#     b = db.from_sequence()
#     c = b.map(lambda x: process_by_weather(weather))
#     c.compute()

In [None]:
# def process_by_weather(weather):
#     temp = pd.DataFrame([weather])
#     temp.to_csv(f'{weather}.csv')
weather_list = ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'TN_Knoxville', 'VA_Richmond']
b = db.from_sequence(weather_list, npartitions = len(weather_list))
c = b.map(lambda x: process_by_weather(x))
c.compute()

In [None]:
# add a module that if the pickle file exist, don't train the model again.

In [None]:
# calculate error decreasement percentage, not only absolute values.

Module X: train FDD

Module X: Only consider sensor accuracy

Module X: Only consider sensor selection

In [None]:
#basic_sensor_set
#moderate_sensor_set
#advanced_sensor_set

In [None]:
# feature importance for each fault type

Module X: integration of sensor accuracy and sensor selection

In [None]:
# This module is frozen because it may be a part of next-year work.

Module X: Comprehensive Metrics merging energy, thermal comfort, maintanence, and sensor cost

In [None]:
# metrics
# false positive
# false negative

In [None]:
# calculate thermal comfort by the termperature unmet hours and building total energy from meta csv file
total_metrics = pd.DataFrame([])

for weather_file in weather_list:
    print(weather_file)
    prefixed = [filename for filename in os.listdir('data/{}/{}/'.format(weather_file,weather_file)) if filename.startswith("summary_results")]
    meta_data_df = pd.read_csv('data/{}/{}/'.format(weather_file,weather_file) + prefixed[0])
    metrics = meta_data_df [['weather_file','fault_type','fault_intensity','electricity_ip', 'natural_gas_ip',
                             'net_site_energy', 'unmet_hours_during_occupied_cooling','unmet_hours_during_occupied_heating']]
    metrics ['total_unmet_hours'] = metrics['unmet_hours_during_occupied_cooling'] + metrics['unmet_hours_during_occupied_heating']
    metrics ['fault_type_with_intensity'] = metrics ['fault_type'] + '_' + metrics ['fault_intensity']
    
    total_metrics = pd.concat([total_metrics, metrics], axis = 0)

total_metrics.to_csv('results/energy_and_thermal_comfort.csv')

In [None]:
# calculate thermal discomfort and enduses from detailed sensor data
discomfort_coefficient = 1
thermal_comfort_zone_center = 25

enduse = ['cooling_electricity [W]', 'electricity_facility [W]',
          'whole_building_facility_total_hvac_electric_demand_power [W]', 'rooftop_supply_fan_fan_electric_energy [W]',
          'fans_electricity [W]', 'gas_facility [W]', 'rooftop_heatingcoil_heating_coil_heating_energy [W]',
          'heating_electricity [W]', 'heating_gas [W]', 'interiorequipment_electricity [W]',
          'interiorlights_electricity [W]'
         ]

room_temperatures = ['room_101_zone_mean_air_temperature [C]',
'room_102_zone_mean_air_temperature [C]',
'room_103_zone_mean_air_temperature [C]',
'room_104_zone_mean_air_temperature [C]',
'room_105_zone_mean_air_temperature [C]',
'room_106_zone_mean_air_temperature [C]',
'room_201_zone_mean_air_temperature [C]',
'room_202_zone_mean_air_temperature [C]',
'room_203_zone_mean_air_temperature [C]',
'room_204_zone_mean_air_temperature [C]',
'room_205_zone_mean_air_temperature [C]',
'room_206_zone_mean_air_temperature [C]']

weather_list = ['AK_Fairbanks', 'FL_Miami', 'KY_Louisville', 'MN_Duluth', 'SAU_Riyadh', 'TN_Knoxville', 'VA_Richmond']

room_temperatures_penalty = [x[0:-4] + '_penalty' for x in room_temperatures]

def discomfort_penalty(x):
    return discomfort_coefficient * ((abs(x-thermal_comfort_zone_center))**3).sum()

enduse_all = pd.DataFrame([])

for weather in weather_list:
    print('Processing: {}...'.format(weather))
    meta_data_temp = pd.read_csv(
        'data/{}/{}/{}'.format(weather, weather, os.listdir('data/{}/{}/'.format(weather,weather))[-1]))
    
    for file in os.listdir('data/{}/{}/'.format(weather,weather))[0:-1]:
        temp = pd.read_csv('data/{}/{}/{}'.format(weather, weather, file))
        room_temp_penalty = pd.DataFrame(discomfort_penalty(temp[room_temperatures])).T
        room_temp_penalty.columns = room_temperatures_penalty
        temp_enduse = pd.DataFrame(temp[enduse].mean()).T
        temp_enduse = pd.concat([temp_enduse, room_temp_penalty], axis = 1)
        temp_enduse['weather_file'] = meta_data_temp.loc[meta_data_temp.id == file[0:-12]]['weather_file'].values[0]
        temp_enduse['fault_type'] = meta_data_temp.loc[meta_data_temp.id == file[0:-12]]['fault_type'].values[0]
        temp_enduse['fault_intensity'] = meta_data_temp.loc[meta_data_temp.id == file[0:-12]]['fault_intensity'].values[0]
        enduse_all = pd.concat([enduse_all, temp_enduse], axis = 0)

enduse_all.to_csv('results/enduses_and_discomfort_penalty.csv')

In [None]:
all_metrics = enduse_all.merge(total_metrics, left_on=['weather_file','fault_type','fault_intensity'], right_on=['weather_file','fault_type','fault_intensity'])
all_metrics.to_csv('results/all_metrics.csv')

In [None]:
# example = pd.read_csv('cv_error.csv')
# example = example.iloc[:,0:56].T.reset_index(drop = True)
# example.columns = ['fault_types','sensor','sensor_inaccuracy','model_accuracy']

# example['model_accuracy'] = example['model_accuracy'].astype('float')
# example['model_accuracy_decrease'] = (example['model_accuracy'] + (- 0.46328767123287673) )* (1/0.46328767123287673)

# example.model_accuracy[0]

# result_1 = example.groupby(['sensor_inaccuracy']).mean()[['model_accuracy_decrease']]

# result_1

# additional_row = pd.DataFrame([0.003784], index = ['drift'], columns = ['model_accuracy_decrease'])

# pd.concat([result_1, additional_row], axis = 0)

# example.groupby(['sensor']).mean().sort_values(['model_accuracy_decrease'], ascending = False)[['model_accuracy_decrease']]

In [None]:
# summarize all results:
import pandas as pd
results_all = pd.read_csv('results/cv_error_backup.csv')

In [None]:
results_all.groupby(['fault_type']).mean()

In [None]:
results_all.groupby(['sensor_type']).mean()

In [None]:
results_all.groupby(['sensor_inaccuracy_type']).mean()

In [None]:
results_all.head()

In [None]:
results = pd.read_csv('results/energy_and_thermal_comfort.csv')

In [None]:
results.groupby(['fault_type']).mean()