In [2]:
import polars as pl
import numpy as np
import pandas as pd
import os

# House 1

## Read all data

In [3]:
base_path = "/opt/nilm-shared-data/nilm_device_detection/other_dataset/RAE_dataset/raw_data"
house1_all_data = pl.concat([pl.read_csv(f"{base_path}/house1_subs_blk2.csv"), 
                             pl.read_csv(f"{base_path}/house1_subs_blk1.csv")])
len(house1_all_data)

145637040

In [4]:
transform_columns = ["unix_ts", "Irms", "pf", "P", "Q", "S", "label"]

In [5]:
select_columns = ["unix_ts", "sub", "V", "I", "P", "Q", "S"]
house1_all_data = house1_all_data.select(select_columns)

In [6]:
house1_all_data.head()

unix_ts,sub,V,I,P,Q,S
i64,i64,f64,f64,i64,i64,i64
1457251200,1,119.7,0.0,0,0,3
1457251200,2,119.1,0.0,1,0,3
1457251200,3,119.7,0.0,0,0,1
1457251200,4,119.1,0.0,1,2,3
1457251200,5,119.7,0.0,0,0,3


## Transform data

<b>House 1 labels in house1_labels.txt file</b>
    "kitchen_oven": [1,2],
    "kitchen_counter_plugs": [3,4],
    "clothes_dryer": [5,6],
    "upstairs_bedroom_AFCI_arc-fault_plugs": [7],
    "kitchen_fridge": [8],
    "clothes_washer": [9],
    "kitchen_dishwasher": [10],
    "furnace_and_hot_water_unit": [11],
    "basement_plugs_and_lights": [12],
    "heat_pump": [13, 14],
    "garage_sub_panel": [15,16],
    "upstairs_plug_and_lights": [17,18],
    "basement_blue_plugs": [19],
    "bathrooms": [20],
    "rental_suite_sub_panel": [21, 22],
    "misc_plugs": [23],
    "home_office": [24]\

In [7]:
meter_ids_dict = {
    # "kitchen_oven": [1,2],
    #"kitchen_counter_plugs": [3,4],
    # "clothes_dryer": [5,6],
    #"upstairs_bedroom_AFCI_arc-fault_plugs": [7],
    # "kitchen_fridge": [8],
    #"clothes_washer": [9],
    #"kitchen_dishwasher": [10],
    #"furnace_and_hot_water_unit": [11],
    #"basement_plugs_and_lights": [12],
    # "heat_pump": [13, 14],
    #"garage_sub_panel": [15,16],
    #"upstairs_plug_and_lights": [17,18],
    #"basement_blue_plugs": [19],
    #"bathrooms": [20],
    #"rental_suite_sub_panel": [21, 22],
    "misc_plugs": [23],
    "home_office": [24]
}

In [8]:
save_path = f"/opt/nilm-shared-data/nilm_device_detection/other_dataset/RAE_dataset/transform_data/raw"
for key in meter_ids_dict:
    meter_ids = meter_ids_dict[key]
    print(f"Working on key: {key}")
    if len(meter_ids) == 2:
        load1_df = house1_all_data.filter(pl.col("sub") == meter_ids[0])
        load2_df = house1_all_data.filter(pl.col("sub") == meter_ids[1])
        combined_np = load1_df[:, 3:].to_numpy() + load2_df[:, 3:].to_numpy()
        combined_df = pl.DataFrame(combined_np, schema=["Irms", "P", "Q", "S"])
        combined_df = combined_df.with_columns(label=pl.lit(key))
        combined_df = combined_df.with_columns(unix_ts=pl.lit(load1_df["unix_ts"]))
        combined_df = combined_df.with_columns(pf=pl.col("P")/pl.col("S"))
        combined_df = combined_df.with_columns(
            pl.when(combined_df["S"] != 0)
            .then(combined_df["P"]/combined_df["S"])
            .otherwise(0)
            .alias("pf")
        )
        combined_df = combined_df.select(transform_columns)
        print(combined_df.head())
        combined_df.to_pandas().to_csv(os.path.join(save_path, f"house1_{key}.csv"))
    elif len(meter_ids) == 1:
        load_df = house1_all_data.filter(pl.col("sub") == meter_ids[0])
        load_df = load_df.rename({"I": "Irms"})
        load_df = load_df.with_columns(label=pl.lit(key))  
        load_df = load_df.with_columns(
            pl.when(load_df["S"] != 0)
            .then(load_df["P"]/load_df["S"])
            .otherwise(0)
            .alias("pf")
        )
        load_df = load_df.select(transform_columns)
        print(load_df.head())
        load_df.to_pandas().to_csv(os.path.join(save_path, f"house1_{key}.csv"))

Working on key: kitchen_counter_plugs
shape: (5, 7)
┌────────────┬──────┬──────┬─────┬─────┬─────┬───────────────────────┐
│ unix_ts    ┆ Irms ┆ pf   ┆ P   ┆ Q   ┆ S   ┆ label                 │
│ ---        ┆ ---  ┆ ---  ┆ --- ┆ --- ┆ --- ┆ ---                   │
│ i64        ┆ f64  ┆ f64  ┆ f64 ┆ f64 ┆ f64 ┆ str                   │
╞════════════╪══════╪══════╪═════╪═════╪═════╪═══════════════════════╡
│ 1457251200 ┆ 0.0  ┆ 0.25 ┆ 1.0 ┆ 2.0 ┆ 4.0 ┆ kitchen_counter_plugs │
│ 1457251201 ┆ 0.0  ┆ 0.5  ┆ 2.0 ┆ 2.0 ┆ 4.0 ┆ kitchen_counter_plugs │
│ 1457251202 ┆ 0.0  ┆ 0.5  ┆ 2.0 ┆ 2.0 ┆ 4.0 ┆ kitchen_counter_plugs │
│ 1457251203 ┆ 0.0  ┆ 0.5  ┆ 2.0 ┆ 2.0 ┆ 4.0 ┆ kitchen_counter_plugs │
│ 1457251204 ┆ 0.0  ┆ 0.5  ┆ 2.0 ┆ 2.0 ┆ 4.0 ┆ kitchen_counter_plugs │
└────────────┴──────┴──────┴─────┴─────┴─────┴───────────────────────┘
Working on key: upstairs_bedroom_AFCI_arc-fault_plugs
shape: (5, 7)
┌────────────┬──────┬──────────┬─────┬─────┬─────┬───────────────────────────────────┐
│ unix_ts   

# House 2

In [3]:
house2_data = pl.read_csv("../../data/RAE_dataset/raw_data/house2_subs_blk1.csv")
len(house2_data)

104588169

In [4]:
transform_columns = ["unix_ts", "Irms", "pf", "P", "Q", "S", "label"]
select_columns = ["unix_ts", "sub", "V", "I", "P", "Q", "S"]
save_path = "../../data/RAE_dataset/transform_data"

In [5]:
house2_data = house2_data.select(select_columns)
house2_data.head()

unix_ts,sub,V,I,P,Q,S
i64,i64,f64,f64,i64,i64,i64
1505286000,1,120.9,0.9,71,57,111
1505286000,2,121.1,0.3,32,14,40
1505286000,3,121.1,0.0,0,0,0
1505286000,4,120.9,0.0,0,0,0
1505286000,5,121.1,0.0,1,0,3


## Transform data

<b>House 2 label</b>
-  1 House Sub-Panel L1
-  2 House Sub-Panel L2
-  3 Lights & Plugs (general label)
-  4 Clothes Dryer L1
-  5 Clothes Dryer L2
-  6 Bedroom Plugs
-  7 Built-in Vacuum
-  8 Boiler (for hot water and radiant heating)
-  9 Lights & Plugs (general label)
- 10 Clothes Washer
- 11 Kitchen Fridge
- 12 Lights & Plugs (general label, incl. Internet modem and network equipment)
- 13 Bedrooms AFCI Arc-Fault Plugs
- 14 Kitchen Counter Plugs
- 15 Kitchen Counter Plugs
- 16 Lights & Plugs (general label)
- 17 Lights & Plugs (general label)
- 18 Outside Plugs
- 19 Dishwasher
- 20 Lights & Plugs (general label)
- 21 Mobile Phone Changers (garburator & microwave not installed)

In [1]:
house2_meters = {
    "lights_and_plugs_3": [3],
    "lights_and_plugs_16": [16],
    "lights_and_plugs_20": [20]
}

for key in house2_meters:
    meter_ids = house2_meters[key]
    print(f"Working on key: {key}")
    if len(meter_ids) == 2:
        load1_df = house2_data.filter(pl.col("sub") == meter_ids[0])
        load2_df = house2_data.filter(pl.col("sub") == meter_ids[1])
        combined_np = load1_df[:, 3:].to_numpy() + load2_df[:, 3:].to_numpy()
        combined_df = pl.DataFrame(combined_np, schema=["Irms", "P", "Q", "S"])
        combined_df = combined_df.with_columns(label=pl.lit(key))
        combined_df = combined_df.with_columns(unix_ts=pl.lit(load1_df["unix_ts"]))
        combined_df = combined_df.with_columns(
            pl.when(combined_df["S"] != 0)
            .then(combined_df["P"]/combined_df["S"])
            .otherwise(0)
            .alias("pf")
        )
        combined_df = combined_df.select(transform_columns)
        print(combined_df.head())
        combined_df.to_pandas().to_csv(os.path.join(save_path, f"house2_{key}.csv"))
    elif len(meter_ids) == 1:
        load_df = house2_data.filter(pl.col("sub") == meter_ids[0])
        load_df = load_df.rename({"I": "Irms"})
        load_df = load_df.with_columns(label=pl.lit(key))  
        load_df = load_df.with_columns(
            pl.when(load_df["S"] != 0)
            .then(load_df["P"]/load_df["S"])
            .otherwise(0)
            .alias("pf")
        )
        load_df = load_df.select(transform_columns)
        print(load_df.head())
        load_df.to_pandas().to_csv(os.path.join(save_path, f"house2_{key}.csv"))

Working on key: lights_and_plugs_3


NameError: name 'house2_data' is not defined