In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import time
from zipfile import ZipFile
import warnings
import pickle
import torch
from torch.utils.data import Dataset
import tsfel
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import lightgbm as lgb
from hiclass import LocalClassifierPerNode, LocalClassifierPerParentNode, LocalClassifierPerLevel
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
warnings.filterwarnings('ignore')

In [2]:
columnlist = [
    'Active_Power_Sensor', 'Air_Flow_Sensor',
    'Air_Flow_Setpoint', 'Air_Temperature_Sensor',
    'Air_Temperature_Setpoint', 'Alarm', 'Angle_Sensor',
    'Average_Zone_Air_Temperature_Sensor',
    'Chilled_Water_Differential_Temperature_Sensor',
    'Chilled_Water_Return_Temperature_Sensor',
    'Chilled_Water_Supply_Flow_Sensor',
    'Chilled_Water_Supply_Temperature_Sensor', 'Command',
    'Cooling_Demand_Sensor', 'Cooling_Demand_Setpoint',
    'Cooling_Supply_Air_Temperature_Deadband_Setpoint',
    'Cooling_Temperature_Setpoint', 'Current_Sensor',
    'Damper_Position_Sensor', 'Damper_Position_Setpoint', 'Demand_Sensor',
    'Dew_Point_Setpoint', 'Differential_Pressure_Sensor',
    'Differential_Pressure_Setpoint',
    'Differential_Supply_Return_Water_Temperature_Sensor',
    'Discharge_Air_Dewpoint_Sensor', 'Discharge_Air_Temperature_Sensor',
    'Discharge_Air_Temperature_Setpoint',
    'Discharge_Water_Temperature_Sensor', 'Duration_Sensor',
    'Electrical_Power_Sensor', 'Energy_Usage_Sensor',
    'Filter_Differential_Pressure_Sensor', 'Flow_Sensor', 'Flow_Setpoint',
    'Frequency_Sensor', 'Heating_Demand_Sensor', 'Heating_Demand_Setpoint',
    'Heating_Supply_Air_Temperature_Deadband_Setpoint',
    'Heating_Temperature_Setpoint', 'Hot_Water_Flow_Sensor',
    'Hot_Water_Return_Temperature_Sensor',
    'Hot_Water_Supply_Temperature_Sensor', 'Humidity_Setpoint',
    'Load_Current_Sensor', 'Low_Outside_Air_Temperature_Enable_Setpoint',
    'Max_Air_Temperature_Setpoint', 'Min_Air_Temperature_Setpoint',
    'Outside_Air_CO2_Sensor', 'Outside_Air_Enthalpy_Sensor',
    'Outside_Air_Humidity_Sensor',
    'Outside_Air_Lockout_Temperature_Setpoint',
    'Outside_Air_Temperature_Sensor', 'Outside_Air_Temperature_Setpoint',
    'Parameter', 'Peak_Power_Demand_Sensor', 'Position_Sensor',
    'Power_Sensor', 'Pressure_Sensor', 'Rain_Sensor',
    'Reactive_Power_Sensor', 'Reset_Setpoint',
    'Return_Air_Temperature_Sensor', 'Return_Water_Temperature_Sensor',
    'Room_Air_Temperature_Setpoint', 'Sensor', 'Setpoint',
    'Solar_Radiance_Sensor', 'Speed_Setpoint', 'Static_Pressure_Sensor',
    'Static_Pressure_Setpoint', 'Status', 'Supply_Air_Humidity_Sensor',
    'Supply_Air_Static_Pressure_Sensor',
    'Supply_Air_Static_Pressure_Setpoint', 'Supply_Air_Temperature_Sensor',
    'Supply_Air_Temperature_Setpoint', 'Temperature_Sensor',
    'Temperature_Setpoint', 'Thermal_Power_Sensor', 'Time_Setpoint',
    'Usage_Sensor', 'Valve_Position_Sensor', 'Voltage_Sensor',
    'Warmest_Zone_Air_Temperature_Sensor', 'Water_Flow_Sensor',
    'Water_Temperature_Sensor', 'Water_Temperature_Setpoint',
    'Wind_Direction_Sensor', 'Wind_Speed_Sensor',
    'Zone_Air_Dewpoint_Sensor', 'Zone_Air_Humidity_Sensor',
    'Zone_Air_Humidity_Setpoint', 'Zone_Air_Temperature_Sensor'
]

# Occurence post filtering

In [3]:
LEVEL_LABLES = [
    ['Alarm', 'Command', 'Parameter', 'Sensor', 'Setpoint', 'Status'],
    ['Reset_Setpoint',
    'Usage_Sensor',
    'Pressure_Sensor',
    'Flow_Setpoint',
    'Static_Pressure_Setpoint',
    'Angle_Sensor',
    'Humidity_Setpoint',
    'Temperature_Sensor',
    'Temperature_Setpoint',
    'Supply_Air_Humidity_Sensor',
    'Outside_Air_CO2_Sensor',
    'Differential_Pressure_Setpoint',
    'Damper_Position_Setpoint',
    'Heating_Demand_Setpoint',
    'Cooling_Demand_Setpoint',
    'Current_Sensor',
    'Wind_Speed_Sensor',
    'Flow_Sensor',
    'Dew_Point_Setpoint',
    'Zone_Air_Dewpoint_Sensor',
    'Power_Sensor',
    'Position_Sensor',
    'Solar_Radiance_Sensor',
    'Duration_Sensor',
    'Time_Setpoint',
    'Discharge_Air_Dewpoint_Sensor',
    'Wind_Direction_Sensor',
    'Voltage_Sensor',
    'Zone_Air_Humidity_Sensor',
    'Demand_Sensor',
    'Speed_Setpoint',
    'Rain_Sensor',
    'Frequency_Sensor',
    'Outside_Air_Humidity_Sensor',
    'Outside_Air_Enthalpy_Sensor'],
    ['Air_Flow_Sensor',
    'Water_Temperature_Setpoint',
    'Water_Flow_Sensor',
    'Electrical_Power_Sensor',
    'Zone_Air_Humidity_Setpoint',
    'Heating_Temperature_Setpoint',
    'Air_Flow_Setpoint',
    'Energy_Usage_Sensor',
    'Supply_Air_Static_Pressure_Setpoint',
    'Air_Temperature_Sensor',
    'Valve_Position_Sensor',
    'Cooling_Temperature_Setpoint',
    'Water_Temperature_Sensor',
    'Load_Current_Sensor',
    'Damper_Position_Sensor',
    'Static_Pressure_Sensor',
    'Air_Temperature_Setpoint',
    'Thermal_Power_Sensor',
    'Differential_Pressure_Sensor'],
    ['Supply_Air_Temperature_Sensor',
    'Discharge_Air_Temperature_Sensor',
    'Discharge_Water_Temperature_Sensor',
    'Zone_Air_Temperature_Sensor',
    'Supply_Air_Static_Pressure_Sensor',
    'Outside_Air_Temperature_Setpoint',
    'Supply_Air_Temperature_Setpoint',
    'Chilled_Water_Supply_Flow_Sensor',
    'Chilled_Water_Supply_Temperature_Sensor',
    'Peak_Power_Demand_Sensor',
    'Room_Air_Temperature_Setpoint',
    'Hot_Water_Supply_Temperature_Sensor',
    'Active_Power_Sensor',
    'Min_Air_Temperature_Setpoint',
    'Return_Air_Temperature_Sensor',
    'Hot_Water_Flow_Sensor',
    'Chilled_Water_Differential_Temperature_Sensor',
    'Filter_Differential_Pressure_Sensor',
    'Max_Air_Temperature_Setpoint',
    'Cooling_Supply_Air_Temperature_Deadband_Setpoint',
    'Outside_Air_Temperature_Sensor',
    'Heating_Supply_Air_Temperature_Deadband_Setpoint',
    'Discharge_Air_Temperature_Setpoint',
    'Return_Water_Temperature_Sensor',
    'Reactive_Power_Sensor'],
    ['Low_Outside_Air_Temperature_Enable_Setpoint',
    'Cooling_Demand_Sensor',
    'Chilled_Water_Return_Temperature_Sensor',
    'Average_Zone_Air_Temperature_Sensor',
    'Warmest_Zone_Air_Temperature_Sensor',
    'Heating_Demand_Sensor',
    'Differential_Supply_Return_Water_Temperature_Sensor',
    'Hot_Water_Return_Temperature_Sensor',
    'Outside_Air_Lockout_Temperature_Setpoint']
]

In [4]:
tiers = {i+1: LEVEL_LABLES[i] for i in range(len(LEVEL_LABLES))}

def get_tier(label):
    for tier_num, tier_list in tiers.items():
        if label in tier_list:
            return tier_num
    return None  # Handle cases where the label isn't found in any tier

In [5]:
train_y = pd.read_csv("../downloads/train_y_v0.1.0.csv")

In [6]:
# stackedfinalresult = weighted_res.copy()
stackedfinalresult = pd.read_csv("../logs/submit/0129_15_aug_class_weight.csv")

In [7]:
check = stackedfinalresult
occurence = []

for col1 in tqdm(list(check.columns)[1:]):
    for col2 in list(check.columns)[1:]:
        if col1 != col2:
            oc = check[[col1, col2]]
            # oc = oc[(oc[col1] >= 0.5) & (oc[col2] >= 0.5) & ~(oc[col1] == oc[col2])]
            oc = oc[(oc[col1] >= 0.5) & (oc[col2] >= 0.5)]
            occurence.append([col1, col2, len(oc)])

100%|██████████| 94/94 [00:21<00:00,  4.32it/s]


In [8]:
tst_oc = pd.DataFrame(occurence)
tst_oc = tst_oc[tst_oc[2] > 0]
tst_oc

Unnamed: 0,0,1,2
29,Active_Power_Sensor,Electrical_Power_Sensor,6395
56,Active_Power_Sensor,Power_Sensor,6395
64,Active_Power_Sensor,Sensor,6395
109,Air_Flow_Sensor,Current_Sensor,17
111,Air_Flow_Sensor,Damper_Position_Setpoint,1
...,...,...,...
8726,Zone_Air_Temperature_Sensor,Temperature_Sensor,10556
8727,Zone_Air_Temperature_Sensor,Temperature_Setpoint,29
8733,Zone_Air_Temperature_Sensor,Warmest_Zone_Air_Temperature_Sensor,73
8734,Zone_Air_Temperature_Sensor,Water_Flow_Sensor,1


In [9]:
trn_oc_check = train_y.copy()
trn_occurence = []

for col1 in tqdm(list(trn_oc_check.columns)[1:]):
    for col2 in list(trn_oc_check.columns)[1:]:
        if col1 != col2:
            oc = trn_oc_check[[col1, col2]]
            oc = oc[(oc[col1] == 1) & (oc[col2] == 1)]
            trn_occurence.append([col1, col2, len(oc)])

100%|██████████| 94/94 [00:12<00:00,  7.64it/s]


In [10]:
trn_oc = pd.DataFrame(trn_occurence)
trn_oc = trn_oc[trn_oc[2] > 0]

In [11]:
trn_oc.values[:, :2].shape

oc_map = {}
for src, tgt in trn_oc.values[:, :2]:
    if src not in oc_map:
        oc_map[src] = [tgt]
    else:
        oc_map[src].append(tgt)

In [58]:
get_tier("Sensor")

1

In [None]:
get_tier("Air_Flow_Sensor")

In [12]:
rm_label = []
for row in tqdm(tst_oc.values, total=len(tst_oc)):
    if oc_map.get(row[0]) and row[1] not in oc_map.get(row[0]):
        # rm_label.append((row[1], row[0]))
        if get_tier(row[0]) < get_tier(row[1]):
            rm_label.append((row[0], row[1]))
        else:
            rm_label.append((row[1], row[0]))

100%|██████████| 1038/1038 [00:00<00:00, 149559.86it/s]


In [48]:
filtered_res = stackedfinalresult.copy()
remove_record = []
for src, tgt in tqdm(rm_label):
    # remove_size = len(filtered_res[(filtered_res[src] == 1) & (filtered_res[tgt] == 1)])

    # # Skip if it removes too much
    # skipped = False
    # if remove_size > len(filtered_res[filtered_res[tgt] == 1]) * 0.01:
    #     skipped = True

    # Count how many rows match the condition
    # remove_record.append((src, tgt, remove_size, len(filtered_res[filtered_res[tgt] == 1]), skipped))
    
    # if not skipped:
    #     # Properly update the matching rows in the original DataFrame
    #     filtered_res.loc[(filtered_res[src] == 1) & (filtered_res[tgt] == 1), tgt] = 0.0
    update_indicies = filtered_res[(filtered_res[src] == 1) & (filtered_res[tgt] == 1)].index

    new_tgt = None
    if 'Sensor' in src and 'Setpoint' in tgt:
        new_tgt = '_'.join(tgt.split('_')[:-1] + ['Sensor'])
    if 'Setpoint' in src and 'Sensor' in tgt:
        new_tgt = '_'.join(tgt.split('_')[:-1] + ['Setpoint'])

    if new_tgt in columnlist:
        filtered_res.loc[update_indicies, new_tgt] = 1.0
        filtered_res.loc[update_indicies, tgt] = 0.0
        remove_record.append([src, tgt, new_tgt, len(update_indicies)])

# filtered_res = weighted_res.copy()
# remove_record = []
# for src, tgt in tqdm(rm_label):
#     remove_size = len(filtered_res[~(filtered_res[src] == filtered_res[tgt])])

#     # Count how many rows match the condition
#     remove_record.append((src, tgt, remove_size))

#     # Properly update the matching rows in the original DataFrame
#     filtered_res.loc[(filtered_res[src] > filtered_res[tgt]), tgt] = 0.0
#     filtered_res.loc[(filtered_res[src] < filtered_res[tgt]), src] = 0.0

100%|██████████| 638/638 [00:00<00:00, 641.03it/s]


In [49]:
rm_check = pd.DataFrame(remove_record)

In [51]:
rm_check[[1, 3]].groupby(1).agg('sum').sort_values(3, ascending=False)

Unnamed: 0_level_0,3
1,Unnamed: 1_level_1
Damper_Position_Sensor,1471
Temperature_Setpoint,1437
Air_Flow_Sensor,866
Air_Flow_Setpoint,772
Air_Temperature_Setpoint,488
Damper_Position_Setpoint,379
Flow_Sensor,255
Flow_Setpoint,210
Temperature_Sensor,174
Supply_Air_Temperature_Setpoint,141


In [52]:
rm_check[[1, 3]].groupby(1).agg('sum').sort_values(3, ascending=False)[3].sum()

6399

In [53]:
filtered_res.to_csv("../logs/submit/0130_post_processing_replace.csv", index=False)

In [54]:
def check_pred_num(_final_res, thr=0.4):
    # Exclude 'filename' column if it exists
    filtered_df = _final_res.drop(columns=['filename'], errors='ignore')

    return (filtered_df >= thr).sum(axis=1)

In [56]:
check_pred_num(stackedfinalresult).value_counts()

1    119405
2     82247
3     71922
4     31935
5     10211
Name: count, dtype: int64

In [57]:
check_pred_num(filtered_res).value_counts()

1    119405
2     82987
3     71182
4     31935
5     10211
Name: count, dtype: int64