# Home Dataset Creation

This notebook is used to create the home dataset, by composing different appliances and their respective power consumption. The dataset used is GREEND. The power consumption and metadata are read from the dataset using the [nilmtk](https://github.com/nilmtk) library.

## Setup

In [None]:
%pip install git+https://github.com/nilmtk/nilmtk # Install from git because the conda package is out of date
%pip install git+https://github.com/nilmtk/nilm_metadata 

## Smart*

### Import Dataset

In [5]:
from nilmtk import DataSet
from nilmtk.utils import print_dict

smart_star = DataSet('data/smart_star.h5')

## GREEND

In [2]:
from nilmtk import DataSet
from nilmtk.utils import print_dict

greend = DataSet('data/greend.h5')

In [121]:
appliances = {
    1: ['radio', 'lamp', 'fridge', 'television'],
    3: ['microwave'],
}

def replace_with_average(group):
    group['power'] = group['power'].mean()
    return group.iloc[:1]

for building, appliance_list in appliances.items():
    print("> Building:", building)
    for appliance in appliance_list:
        print(f"  - Appliance: {appliance} ... ", end="")
        name_undercase = appliance.replace(' ', '_')

        df = next(greend.buildings[building].elec[appliance].load()).reset_index()
        df.columns = ["timestamp", "power"]
        df.drop(columns=["timestamp"], inplace=True)
        #df = df.groupby(df.index // 60).apply(replace_with_average)
        #df = df.reset_index(drop=True)
        df = df.rolling(60, min_periods=1).mean()[::60]

        df.to_csv(f"appliances/{name_undercase}.csv", index=False)
        print("OK")

> Building: 1
  - Appliance: dish washer ... OK
  - Appliance: washing machine ... OK
  - Appliance: radio ... OK
  - Appliance: lamp ... OK
  - Appliance: fridge ... OK
  - Appliance: television ... OK
> Building: 3
  - Appliance: microwave ... OK
> Building: 6
  - Appliance: electric oven ... OK


In [109]:
import numpy as np
from nilmtk.utils import timedelta64_to_secs

def get_activations(chunk, min_off_duration=0, min_on_duration=0,
                    border=1, on_power_threshold=5):
    """Returns runs of an appliance.

    Most appliances spend a lot of their time off.  This function finds
    periods when the appliance is on.

    Parameters
    ----------
    chunk : pd.Series
    min_off_duration : int
        If min_off_duration > 0 then ignore 'off' periods less than
        min_off_duration seconds of sub-threshold power consumption
        (e.g. a washing machine might draw no power for a short
        period while the clothes soak.)  Defaults to 0.
    min_on_duration : int
        Any activation lasting less seconds than min_on_duration will be
        ignored.  Defaults to 0.
    border : int
        Number of rows to include before and after the detected activation
    on_power_threshold : int or float
        Watts

    Returns
    -------
    list of pd.Series.  Each series contains one activation.
    """
    when_on = chunk >= on_power_threshold

    # Find state changes
    state_changes = when_on.astype(np.int).diff()
    del when_on
    switch_on_events = np.where(state_changes == 1)[0]
    switch_off_events = np.where(state_changes == -1)[0]
    del state_changes

    if len(switch_on_events) == 0 or len(switch_off_events) == 0:
        return []

    # Make sure events align
    if switch_off_events[0] < switch_on_events[0]:
        switch_off_events = switch_off_events[1:]
        if len(switch_off_events) == 0:
            return []
    if switch_on_events[-1] > switch_off_events[-1]:
        switch_on_events = switch_on_events[:-1]
        if len(switch_on_events) == 0:
            return []
    assert len(switch_on_events) == len(switch_off_events)

    # Smooth over off-durations less than min_off_duration
    if min_off_duration > 0:
        off_durations = (chunk.index[switch_on_events[1:]].values -
                         chunk.index[switch_off_events[:-1]].values)

        #off_durations = timedelta64_to_secs(off_durations)

        above_threshold_off_durations = np.where(
            off_durations >= min_off_duration)[0]

        # Now remove off_events and on_events
        switch_off_events = switch_off_events[
            np.concatenate([above_threshold_off_durations,
                            [len(switch_off_events)-1]])]
        switch_on_events = switch_on_events[
            np.concatenate([[0], above_threshold_off_durations+1])]
    assert len(switch_on_events) == len(switch_off_events)

    activations = []
    for on, off in zip(switch_on_events, switch_off_events):
        duration = (chunk.index[off] - chunk.index[on])
        if duration < min_on_duration:
            continue
        on -= 1 + border
        if on < 0:
            on = 0
        off += border
        activation = chunk.iloc[on:off]
        # throw away any activation with any NaN values
        if not activation.isnull().values.any():
            activations.append(activation)

    return activations

In [187]:
activations = greend.buildings[1].elec['television'].get_activations(min_off_duration=10, min_on_duration=10, on_power_threshold=10)

lens = [len(a) for a in activations]
lens_mean = np.mean(lens)
print(len(activations), lens_mean)

114 261.06140350877195


In [179]:
import os 

activations ={
    1: {'lamp': [10,10,5], 'radio': [10,10,20], 'television': [10,10,10]},
    3: {'microwave': [30,12,200]},
}

for building, appliance_list in activations.items():
    print("> Building:", building)
    for appliance, params in appliance_list.items():
        print(f"  - Appliance: {appliance} ... ", end="")
        name_undercase = appliance.replace(' ', '_')

        #app = pd.read_csv(f"appliances/{name_undercase}.csv"
        app = greend.buildings[building].elec[appliance].get_activations(min_off_duration=params[0], min_on_duration=params[1], on_power_threshold=params[2])

        os.mkdir(f"activations/{name_undercase}")

        for i, activ in enumerate(app):
            activ.reset_index(drop=True, inplace=True)
            activ.dropna(inplace=True)
        
            activ.to_csv(f"activations/{name_undercase}/{i}.csv", index=False)

        print("OK")

> Building: 1
  - Appliance: lamp ... 

  activ.to_csv(f"activations/{name_undercase}/{i}.csv", index=False)


OK
  - Appliance: radio ... OK
> Building: 3
  - Appliance: microwave ... OK


## DEDDIAG

### Load Dataset

In [78]:
from nilmtk import DataSet

deddiag = DataSet('data/deddiag.h5')
building = 8
elec = deddiag.buildings[building].elec

### Extract Appliances

In [107]:
dish_washer = elec['washing machine']

len(dish_washer.get_activations(min_on_duration=10*60, min_off_duration=10*60, on_power_threshold=300))

68