In [1]:
from os.path import join
from pylab import rcParams
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from iawe_dao import IaweDAO
from datetime import datetime

# Ultilities

In [2]:
def transform_nilmtk_df_to_my_df(input_df: pd.DataFrame) -> pd.DataFrame:
    new_df = pd.DataFrame()
    new_df["Time"] = input_df.index
    new_df["unix_ts"] = input_df.index.map(lambda x: datetime.timestamp(x)) 
    
    if ('current', '') in input_df.columns:
        new_df["Irms"] = input_df[('current', '')].values
    if ('voltage', '') in input_df.columns:
        new_df["Urms"] = input_df[('voltage', '')].values
    if ('power', 'active') in input_df.columns:
        new_df["P"] = input_df[('power', 'active')].values
    if ('power', 'reactive') in input_df.columns:
        new_df["Q"] = input_df[('power', 'reactive')].values
    if ('power', 'apparent') in input_df.columns:
        new_df["S"] = input_df[('power', 'apparent')].values
    if "P" in new_df.columns and "S" in new_df.columns:
        new_df["AvgPowerFactor"] = new_df["P"]/new_df["S"]
    return new_df
    
def extract_turn_on_sequence(sequence_idx, appliance_activated_data: pd.DataFrame) -> pd.DataFrame:
    # Assuming your DataFrame is named 'appliance_activated_data'
    # Filter rows with Event value 1
    start_indices = appliance_activated_data[appliance_activated_data['Event'] == 1].index

    # Filter rows with Event value 2
    end_indices = appliance_activated_data[appliance_activated_data['Event'] == 2].index
    if ((sequence_idx > len(start_indices)) or (sequence_idx < 0)):
        raise ValueError(f"Sequence idx should be between 0 and {len(start_indices)}")
    
    # Get the first sequence (from start to end)
    if len(start_indices) > 0 and len(end_indices) > 0:
        first_sequence = appliance_activated_data.loc[start_indices[sequence_idx]:end_indices[sequence_idx]]
        return first_sequence
    else:
        print("No valid sequences found.")
        return pd.DataFrame
    
def plot_data(input_df, which_plot: dict):
    plt.figure(figsize=(10, 6))
    if(which_plot["Irms"] == True):
        plt.plot(input_df['Time'], input_df['Irms'], label='RMS Current (I)')
    if(which_plot["Urms"] == True):
        plt.plot(input_df['Time'], input_df['Urms'], label='RMS Voltage (V)')
    if(which_plot["P"] == True):
        plt.plot(input_df['Time'], input_df['P'], label='Power (P)')
    if(which_plot["Q"] == True):
        plt.plot(input_df['Time'], input_df['Q'], label='Reactive Power (Q)')
    if(which_plot["S"] == True):
        plt.plot(input_df['Time'], input_df['S'], label='Apparent Power (S)')
    plt.xlabel('Time')
    plt.yticks(range(0, 2000, 50))  # Set y-axis ticks at 0, 50, 100, ..., 700
    plt.title(f'{which_plot["name"]}')
    plt.legend()
    plt.grid(True)
    plt.show()

# Load original dataset

## Inspect

In [3]:
iawe_dao = IaweDAO(h5_path="/opt/nilm-shared-data/nilm_device_detection/other_dataset/iawe/iawe.h5")
save_path = "/opt/nilm-shared-data/nilm_device_detection/other_dataset/iawe/transform_data/"

In [4]:
iawe_dao.get_device_and_mains_info()

[-] Main meters:
Main meter: ElecMeter(instance=1, building=1, dataset='iAWE', site_meter, appliances=[]), columns: [('power factor', None), ('frequency', 'apparent'), ('power', 'reactive'), ('power', 'apparent'), ('power', 'active'), ('current', None), ('voltage', 'apparent')]
Main meter: ElecMeter(instance=2, building=1, dataset='iAWE', site_meter, appliances=[]), columns: [('power factor', None), ('frequency', 'apparent'), ('power', 'reactive'), ('power', 'apparent'), ('power', 'active'), ('current', None), ('voltage', 'apparent')]

[-] Application meters:
Meter: ElecMeter(instance=1, building=1, dataset='iAWE', site_meter, appliances=[]), columns: [('power factor', None), ('frequency', 'apparent'), ('power', 'reactive'), ('power', 'apparent'), ('power', 'active'), ('current', None), ('voltage', 'apparent')]
Meter: ElecMeter(instance=2, building=1, dataset='iAWE', site_meter, appliances=[]), columns: [('power factor', None), ('frequency', 'apparent'), ('power', 'reactive'), ('power'

# Tranform main

In [5]:
main_df_dataframe1 = transform_nilmtk_df_to_my_df(input_df=iawe_dao.load_main_df(meter_id=1))
main_df_dataframe1.head()

Unnamed: 0,Time,unix_ts,Irms,P,Q,S,AvgPowerFactor
0,2013-07-01 00:00:00+05:30,1372617000.0,3.12894,677.111023,77.132896,695.848999,0.973072
1,2013-07-01 00:00:03+05:30,1372617000.0,3.20957,694.346008,81.619797,713.953979,0.972536
2,2013-07-01 00:00:06+05:30,1372617000.0,3.22031,693.801025,79.149803,716.361023,0.968508
3,2013-07-01 00:00:07+05:30,1372617000.0,3.18518,693.388,79.191101,708.450012,0.97874
4,2013-07-01 00:00:10+05:30,1372617000.0,2.70798,587.476013,34.7509,602.125,0.975671


In [9]:
#main_df_dataframe1.to_csv(f"{save_path}/mains_data_1.csv", index=False)

In [6]:
main_df_dataframe0 = transform_nilmtk_df_to_my_df(input_df=iawe_dao.load_main_df(meter_id=0))
print(len(main_df_dataframe0))
main_df_dataframe0.head()

2091769


Unnamed: 0,Time,unix_ts,Irms,P,Q,S,AvgPowerFactor
0,2013-07-01 00:00:00+05:30,1372617000.0,0.128665,15.7126,6.0000300000000004e-18,28.636999,0.548682
1,2013-07-01 00:00:03+05:30,1372617000.0,0.134082,16.362,6.0000300000000004e-18,29.849199,0.548155
2,2013-07-01 00:00:06+05:30,1372617000.0,0.134063,14.9257,6.0000300000000004e-18,29.8458,0.500094
3,2013-07-01 00:00:07+05:30,1372617000.0,0.13405,14.9314,6.0000300000000004e-18,29.8389,0.5004
4,2013-07-01 00:00:10+05:30,1372617000.0,0.134618,14.9448,6.0000300000000004e-18,29.9555,0.4989


In [11]:
# main_df_dataframe0.to_csv(f"{save_path}/mains_data_0.csv", index=False)

# Transforming data from devices

In [8]:
fridge_df = iawe_dao.load_device_df(meter_id=2)
fridge_transform_df = transform_nilmtk_df_to_my_df(fridge_df)
#fridge_transform_df.to_csv(f"{save_path}/fridge_iawe.csv", index=False)

fridge_df.head()

Meter information: ElecMeter(instance=3, building=1, dataset='iAWE', appliances=[Appliance(type='fridge', instance=1)])


physical_quantity,power,power,voltage,power,frequency,current
type,reactive,apparent,Unnamed: 3_level_1,active,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-07-01 00:00:00+05:30,49.313,110.252998,221.080002,98.610001,50.099998,0.499
2013-07-01 00:00:01+05:30,49.293999,110.167999,221.080002,98.524002,50.119999,0.498
2013-07-01 00:00:02+05:30,49.368999,110.288002,221.089996,98.622002,50.099998,0.499
2013-07-01 00:00:03+05:30,49.431999,110.299004,221.149994,98.601997,50.119999,0.499
2013-07-01 00:00:04+05:30,49.310001,110.193001,221.009995,98.544998,50.110001,0.499


In [9]:
air_conditioner1_df = iawe_dao.load_device_df(meter_id=3)
air_conditioner1_transform_df = transform_nilmtk_df_to_my_df(air_conditioner1_df)
#air_conditioner1_transform_df.to_csv(f"{save_path}/air_conditioner_1_iawe.csv", index=False)

air_conditioner1_transform_df.head()

Meter information: ElecMeter(instance=4, building=1, dataset='iAWE', appliances=[Appliance(type='air conditioner', instance=1)])


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-01 20:14:59+05:30,1372690000.0,8.634,201.570007,1717.727051,273.408997,1739.349976,0.987568
1,2013-07-01 20:15:00+05:30,1372690000.0,8.638,201.479996,1718.790039,272.576996,1740.269043,0.987658
2,2013-07-01 20:15:01+05:30,1372690000.0,8.642,201.460007,1718.781982,277.901001,1741.103027,0.98718
3,2013-07-01 20:15:02+05:30,1372690000.0,8.641,201.389999,1718.343994,274.575989,1740.142944,0.987473
4,2013-07-01 20:15:03+05:30,1372690000.0,8.616,201.380005,1715.541016,259.509003,1735.057983,0.988751


In [10]:
air_conditioner2_df = iawe_dao.load_device_df(meter_id=4)
air_conditioner2_transform_df = transform_nilmtk_df_to_my_df(air_conditioner2_df)
#air_conditioner2_transform_df.to_csv(f"{save_path}/air_conditioner_2_iawe.csv", index=False)

air_conditioner2_transform_df.head()

Meter information: ElecMeter(instance=5, building=1, dataset='iAWE', appliances=[Appliance(type='air conditioner', instance=2)])


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-01 00:00:00+05:30,1372617000.0,0.801,221.149994,174.455002,30.167,177.044006,0.985376
1,2013-07-01 00:00:01+05:30,1372617000.0,0.801,221.130005,174.576996,30.183001,177.167007,0.985381
2,2013-07-01 00:00:02+05:30,1372617000.0,0.802,221.199997,174.822998,30.150999,177.404007,0.985451
3,2013-07-01 00:00:03+05:30,1372617000.0,0.802,221.199997,174.710007,30.156,177.294006,0.985425
4,2013-07-01 00:00:04+05:30,1372617000.0,0.8,221.210007,174.274994,30.193001,176.871002,0.985323


In [12]:
time = ["2013-07-01 20:15:07+05:30", "2013-07-01 20:15:08+05:30", "2013-07-01 20:15:10+05:30", "2013-07-01 20:15:11+05:30", "2013-07-01 20:15:12+05:30"]
air_conditioner1_transform_df[air_conditioner1_transform_df["Time"].isin(time)]

Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
7,2013-07-01 20:15:07+05:30,1372690000.0,1.974,224.059998,401.403992,185.697998,442.277008,0.907585
8,2013-07-01 20:15:08+05:30,1372690000.0,1.973,223.759995,401.063995,184.800003,441.59201,0.908223
10,2013-07-01 20:15:10+05:30,1372690000.0,1.945,223.440002,393.890991,183.602005,434.579987,0.906372
11,2013-07-01 20:15:11+05:30,1372690000.0,1.943,223.289993,393.433014,182.970001,433.89801,0.906741
12,2013-07-01 20:15:12+05:30,1372690000.0,1.945,223.160004,393.829987,182.625,434.113007,0.907206


In [13]:
fridge_transform_df[fridge_transform_df["Time"].isin(time)]

Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
67188,2013-07-01 20:15:07+05:30,1372690000.0,0.453,225.229996,87.287003,53.034,102.135002,0.854624
67189,2013-07-01 20:15:08+05:30,1372690000.0,0.453,225.059998,87.193001,52.960999,102.016998,0.854691
67191,2013-07-01 20:15:10+05:30,1372690000.0,0.453,224.639999,87.150002,52.671001,101.830002,0.855838
67192,2013-07-01 20:15:11+05:30,1372690000.0,0.453,224.440002,87.042,52.5,101.649002,0.8563
67193,2013-07-01 20:15:12+05:30,1372690000.0,0.453,224.320007,86.995003,52.648998,101.685997,0.855526


In [14]:
main_df_dataframe1[main_df_dataframe1["Time"].isin(time)]

Unnamed: 0,Time,unix_ts,Irms,P,Q,S,AvgPowerFactor
53059,2013-07-01 20:15:07+05:30,1372690000.0,11.8241,2622.719971,150.768997,2661.120117,0.98557
53060,2013-07-01 20:15:08+05:30,1372690000.0,11.7389,2622.600098,150.856995,2641.830078,0.992721
53061,2013-07-01 20:15:10+05:30,1372690000.0,11.7976,2624.189941,148.505005,2655.149902,0.98834
53062,2013-07-01 20:15:11+05:30,1372690000.0,11.7986,2625.139893,150.490997,2655.290039,0.988645
53063,2013-07-01 20:15:12+05:30,1372690000.0,11.8014,2626.47998,140.354004,2655.800049,0.98896


In [21]:
main_df_dataframe0[main_df_dataframe0["Time"].isin(time)]

Unnamed: 0,Time,unix_ts,Irms,P,Q,S,AvgPowerFactor
53059,2013-07-01 20:15:07+05:30,1372690000.0,9.15218,2025.219971,291.026001,2063.320068,0.981535
53060,2013-07-01 20:15:08+05:30,1372690000.0,9.15362,2024.920044,286.013,2063.550049,0.98128
53061,2013-07-01 20:15:10+05:30,1372690000.0,9.20202,2025.040039,294.916992,2074.560059,0.97613
53062,2013-07-01 20:15:11+05:30,1372690000.0,9.20078,2024.890015,290.330994,2074.26001,0.976199
53063,2013-07-01 20:15:12+05:30,1372690000.0,9.1989,2024.599976,280.247986,2073.790039,0.97628


In [19]:
main_original_df = iawe_dao.load_main_df(meter_id=0)
main_original_df[main_original_df.index.isin(time)]

physical_quantity,power,power,power,current
type,reactive,apparent,active,Unnamed: 4_level_1
2013-07-01 20:15:07+05:30,291.026001,2063.320068,2025.219971,9.15218
2013-07-01 20:15:08+05:30,286.013,2063.550049,2024.920044,9.15362
2013-07-01 20:15:10+05:30,294.916992,2074.560059,2025.040039,9.20202
2013-07-01 20:15:11+05:30,290.330994,2074.26001,2024.890015,9.20078
2013-07-01 20:15:12+05:30,280.247986,2073.790039,2024.599976,9.1989


In [17]:
main_original_df.head()

physical_quantity,power,power,power,current
type,reactive,apparent,active,Unnamed: 4_level_1
2013-07-01 00:00:00+05:30,6.0000300000000004e-18,28.636999,15.7126,0.128665
2013-07-01 00:00:03+05:30,6.0000300000000004e-18,29.849199,16.362,0.134082
2013-07-01 00:00:06+05:30,6.0000300000000004e-18,29.8458,14.9257,0.134063
2013-07-01 00:00:07+05:30,6.0000300000000004e-18,29.8389,14.9314,0.13405
2013-07-01 00:00:10+05:30,6.0000300000000004e-18,29.9555,14.9448,0.134618


In [16]:
washin_machine_df = iawe_dao.load_device_df(meter_id=5)
washin_machine_transform_df = transform_nilmtk_df_to_my_df(washin_machine_df)
print(len(washin_machine_transform_df))
# washin_machine_transform_df.to_csv(f"{save_path}/washing_machine_iawe.csv", index=False)

washin_machine_transform_df.head()

Meter information: ElecMeter(instance=6, building=1, dataset='iAWE', appliances=[Appliance(type='washing machine', instance=1)])
15686


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-02 11:02:03+05:30,1372743000.0,0.918,243.880005,222.261993,27.674999,223.979004,0.992334
1,2013-07-02 11:02:04+05:30,1372743000.0,0.919,243.860001,222.485992,27.599001,224.190994,0.992395
2,2013-07-02 11:02:05+05:30,1372743000.0,0.923,244.75,224.222,28.174,225.985001,0.992199
3,2013-07-02 11:02:06+05:30,1372743000.0,0.927,245.929993,226.085007,29.358999,227.983994,0.991671
4,2013-07-02 11:02:10+05:30,1372743000.0,1.061,220.660004,231.934006,-31.910999,234.119003,0.990667


In [17]:
computer_df = iawe_dao.load_device_df(meter_id=6)
transform_df = transform_nilmtk_df_to_my_df(computer_df)
print(len(transform_df))
# transform_df.to_csv(f"{save_path}/computer_iawe.csv", index=False)

transform_df.head()

Meter information: ElecMeter(instance=7, building=1, dataset='iAWE', appliances=[Appliance(type='computer', instance=1)])
1428128


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-01 07:41:55+05:30,1372645000.0,0.021,206.869995,0.281,-4.255,4.264,0.065901
1,2013-07-01 07:41:56+05:30,1372645000.0,0.02,206.839996,0.297,-4.172,4.182,0.071019
2,2013-07-01 07:41:57+05:30,1372645000.0,0.02,206.789993,0.261,-4.173,4.182,0.06241
3,2013-07-01 07:41:58+05:30,1372645000.0,0.021,206.850006,0.243,-4.257,4.264,0.056989
4,2013-07-01 07:42:02+05:30,1372645000.0,0.317,205.710007,58.176998,-29.41,65.189003,0.892436


In [18]:
clothes_iron_df = iawe_dao.load_device_df(meter_id=7)
transform_df = transform_nilmtk_df_to_my_df(clothes_iron_df)
print(len(transform_df))
# transform_df.to_csv(f"{save_path}/clothes_iron_iawe.csv", index=False)

transform_df.head()

Meter information: ElecMeter(instance=8, building=1, dataset='iAWE', appliances=[Appliance(type='clothes iron', instance=1)])
3643


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-08 07:32:11+05:30,1373249000.0,0.031,205.699997,0.257,-6.366,6.371,0.040339
1,2013-07-08 07:32:12+05:30,1373249000.0,0.031,205.639999,0.275,-6.402,6.408,0.042915
2,2013-07-08 07:32:13+05:30,1373249000.0,0.03,205.490005,0.297,-6.258,6.265,0.047406
3,2013-07-08 07:32:14+05:30,1373249000.0,0.031,204.869995,0.307,-6.303,6.31,0.048653
4,2013-07-08 07:32:15+05:30,1373249000.0,0.031,205.059998,0.206,-6.302,6.305,0.032672


In [19]:
television_df = iawe_dao.load_device_df(meter_id=9)
transform_df = transform_nilmtk_df_to_my_df(television_df)
print(len(transform_df))
# transform_df.to_csv(f"{save_path}/television_iawe.csv", index=False)

transform_df.head()

Meter information: ElecMeter(instance=10, building=1, dataset='iAWE', appliances=[Appliance(type='television', instance=1)])
283097


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-10 17:43:02+05:30,1373458000.0,0.338,231.410004,73.282997,-27.378,78.546997,0.932983
1,2013-07-10 17:43:03+05:30,1373458000.0,0.34,230.679993,73.278999,-27.136,78.251999,0.936449
2,2013-07-10 17:43:04+05:30,1373458000.0,0.339,230.050003,73.387001,-26.854,78.009003,0.94075
3,2013-07-10 17:43:05+05:30,1373458000.0,0.341,229.509995,73.453003,-27.361,78.383003,0.937104
4,2013-07-10 17:43:06+05:30,1373458000.0,0.34,230.529999,73.418999,-27.421,78.373001,0.936789


In [20]:
wa_df = iawe_dao.load_device_df(meter_id=10)
transform_df = transform_nilmtk_df_to_my_df(wa_df)
print(len(transform_df))
# transform_df.to_csv(f"{save_path}/wet_appliance_iawe.csv", index=False)

transform_df.head()

Meter information: ElecMeter(instance=11, building=1, dataset='iAWE', appliances=[Appliance(type='wet appliance', instance=1)])
1694622


Unnamed: 0,Time,unix_ts,Irms,Urms,P,Q,S,AvgPowerFactor
0,2013-07-12 15:08:11+05:30,1373622000.0,0.013,228.190002,0.276,2.984,2.997,0.092092
1,2013-07-12 15:08:12+05:30,1373622000.0,0.013,228.529999,0.329,3.066,3.084,0.10668
2,2013-07-12 15:08:13+05:30,1373622000.0,0.013,228.570007,0.422,3.028,3.057,0.138044
3,2013-07-12 15:08:14+05:30,1373622000.0,0.014,228.539993,0.448,3.078,3.111,0.144005
4,2013-07-12 15:08:15+05:30,1373622000.0,0.014,228.570007,0.42,3.11,3.138,0.133843


In [21]:
motor_df = iawe_dao.load_device_df(meter_id=11)
transform_df = transform_nilmtk_df_to_my_df(motor_df)
print(len(transform_df))
# transform_df.to_csv(f"{save_path}/motor_iawe.csv", index=False)

transform_df.head()

Meter information: ElecMeter(instance=12, building=1, dataset='iAWE', appliances=[Appliance(type='motor', instance=1)])
61849


Unnamed: 0,Time,unix_ts,P
0,2013-07-01 00:00:03+05:30,1372617000.0,0.0
1,2013-07-01 00:00:09+05:30,1372617000.0,0.0
2,2013-07-01 00:00:15+05:30,1372617000.0,0.0
3,2013-07-01 00:00:21+05:30,1372617000.0,0.0
4,2013-07-01 00:00:27+05:30,1372617000.0,0.0
