In [1]:
import pandas as pd
import glob
import os
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.validators.scatter.marker import SymbolValidator

# configuration setting

In [2]:
weather = "Norfolk"
simulationtimestep = 6 # datapoints per hour
aggregate_n_runs = simulationtimestep*24 # number of datapoints corresponding to the time window interval for afdd reporting 
reporting_datapoints = int(365 * (simulationtimestep*24)/aggregate_n_runs) # number of datapoints for a year

# reading metadata

In [3]:
file1 = f"../run/data/{weather}/{weather}/summary_results_algorithm_{weather}_AMY.csv"
df_meta = pd.read_csv(file1)
df_meta

Unnamed: 0,id,weather_file,fault_type,fault_intensity,sensorfilename,electricity_ip,eui,natural_gas_ip,net_site_energy,unmet_hours_during_occupied_cooling,unmet_hours_during_occupied_heating
0,3f9c38dd-02c2-47bf-945c-ae401017ccae,Knoxville_TN_AMY,lighting_setback_error_early_termination,3.0,lighting_setback_error_early_termination_3.0,50667.91345,52.490301,12.326634,185212.9435,393.5,443.5
1,e0baeb44-227c-4374-9c14-6693f3f6c9b1,Knoxville_TN_AMY,excessive_infiltration,0.4,excessive_infiltration_0.4,51325.60982,52.968438,11.765413,186900.058,393.5,443.5
2,912040e6-d307-410e-8321-e71e49c0c615,Knoxville_TN_AMY,supply_air_duct_leakages,0.3,supply_air_duct_leakages_0.3,52098.51005,53.535218,11.128252,188899.9521,393.5,443.75
3,11ce6ed7-03e1-426d-adbb-04a8011d767f,Knoxville_TN_AMY,hvac_setback_error_delayed_onset,3.0,hvac_setback_error_delayed_onset_3.0,53019.81187,54.212131,10.377906,191288.4512,393.5,443.5
4,655d9c80-14e6-4f80-a6f1-711b2ff0f9e5,Knoxville_TN_AMY,thermostat_bias,3.0,thermostat_bias_3.0,49843.37448,51.893973,13.040346,183108.7895,393.5,443.5
5,a5566c06-690a-47dc-8a6e-9f695eae8ea0,Knoxville_TN_AMY,improperly_sized_equipment_wshp,1.5,improperly_sized_equipment_wshp_1.5,50442.30443,52.474184,13.040068,185156.0745,393.5,443.5
6,843c0271-ddb1-4cf3-972c-b9f0524d4fb8,Knoxville_TN_AMY,condenser_fouling_in_chiller,0.4,condenser_fouling_chiller_0.4,49842.69447,53.183331,17.585145,187658.3117,393.5,443.5
7,e1598de2-3062-4f40-b343-05defbd8c32c,Knoxville_TN_AMY,nonstandard_refrigerant_charging_in_chiller,1.4,nonstandard_charging_chiller_1.4,49842.57836,56.909038,30.738201,200804.5351,393.5,443.5
8,6961fa68-66ce-42fd-9161-0fcb37bfa0bd,Knoxville_TN_AMY,presence_of_noncondensable_in_chiller,0.04,presence_of_noncondensable_chiller_0.04,49737.46135,51.942324,13.567793,183279.3966,393.5,443.75


# setting simulation environment

In [4]:
num_tree = 2
reporting = 'daily'

# reading splitted timestamp

In [None]:
df_timestamp = pd.read_csv("C:/Users/JKIM4/Documents/GitHub/FDD_RF_Repo/JupyterNotebook/timestamp.csv")
df_timestamp

# reading results file
- currently, manually added number of trees used at the end of file name after training/testing is completed 

In [None]:
file2 = f"../run/results/{weather}_{num_tree}.csv"
df_results = pd.read_csv(file2)

new_columns = df_results.columns[0].split("output_test")[1].split("_sensors.csv")[:-1]
df_results = pd.DataFrame(df_results.values.reshape(int(len(df_results.values)/reporting_datapoints),reporting_datapoints)).T
df_results.columns = new_columns
df_results

In [None]:
# # temporary
# df_results = df_results.sample(frac=0.2)

In [None]:
df_diagnosis = []

for bldgid in df_results.columns:
    
    temp = pd.DataFrame(df_results[bldgid].groupby(df_results[bldgid]).count())
    temp.columns = ['counts']
    temp = temp.sort_values(by='counts', ascending=False)
    diagnose_fault = temp.iloc[0:1].index[0]
    diagnose_pcnt = (temp.iloc[0:1].counts.values[0])/(temp.sum().values[0])
    groundtruth = df_meta.loc[df_meta.id==bldgid, :].fault_type.values[0]
    
    df_diagnosis.append([bldgid, groundtruth, diagnose_fault, diagnose_pcnt])
    
    #print(f"Ground Truth = {groundtruth} || Diagnosis = {diagnose_fault}")
    
df_diagnosis = pd.DataFrame(df_diagnosis)
df_diagnosis.columns = ['id','GroundTruth','DiagnosedFault','ConfidenceRatio']
df_diagnosis = df_diagnosis.sort_values(by='ConfidenceRatio',ascending=False)
df_diagnosis

In [None]:
# ################################################################
# # CREATING EMPTY DATAFRAME
# ################################################################
# datestart = '2019-01-01 00:00:00'
# dateend = '2019-12-31 23:00:00'
# df_combined = pd.DataFrame([])
# df_combined['reading_time'] = pd.date_range(datestart, dateend, freq='1D')
# df_combined = df_combined.set_index(['reading_time'])
# df_combined.sample(frac=0.2).to_csv("./timestamp.csv")

In [None]:
df_results

In [None]:
df_results.index = df_timestamp.reading_time
df_results

In [None]:
i=0

fig = go.Figure()

for bldgid in df_results.columns:
    
    groundtruth = df_meta.loc[df_meta.id==bldgid, :].fault_type.values[0]

    fig.add_trace(go.Scatter(
        x=df_results.index,
        y=df_results[bldgid],
        name=groundtruth,
        mode='markers',
        marker=dict(
            size=10,
            symbol=str(i),
        )
    ))
    
    i+=1
    
fig.update_layout(
    width=1200,
    height=400,
    margin=dict(
        #l=70,
        r=0,
        t=0,
        b=0
    ),
    plot_bgcolor='rgba(255,242,204,0.0)',    
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1d",
                     step="day",
                     stepmode="backward"),
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.update_xaxes(
#     tickson='boundaries',
#     showgrid=True,
#     gridwidth=1, 
#     gridcolor='Black',
    showline=True, 
    linewidth=2, 
    linecolor='black'
)


if not os.path.exists("figures"):
    os.mkdir("figures")

fig.write_html("./figures/results_timeseries_{}_{}.html".format(reporting, num_tree))

fig.show()