In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np

from nilmtk.api import API
from nilmtk_contrib.disaggregate import PB_Single, PB_Multi

Using TensorFlow backend.


In [2]:
from nilmtk import DataSet

refit_data = DataSet('converted.h5')

In [3]:
# The variables that can be altered for the PB Single and Multi branch are:
# window_size: int, steps taken for analysis. Default 20
# n_features: int, number of features to use. Default 1

# n_epochs: int, number of epochs to train. Default 10
# batch_size: int, size of batches during training. Default 512
# pb_value: float, quantile value that is between 0 and 1. Default 0.5

# use_dropout: bool, use dropout or not. Default True
# dropout_rate: float, rate of dropout between 0 and 1. Only relevant if dropout is active. Default 0.2

# use_maxpool: bool, use maxpool after the Conv layers or not. Default False

sampling_interval = 10
experiment = {
    'power': {'mains': ['active'],
              'appliance': ['active']},
    'sample_rate': sampling_interval,
    'appliances': ['television'],
    'methods': {'PB-Single': PB_Single({'pb_value': 0.5, 'window_size': 20,
                                        'n_epochs': 1, 'batch_size': 128}),
                'PB-Multi': PB_Multi({'pb_value': 0.5, 'window_size': 20,
                                      'n_epochs': 1, 'batch_size': 128})},
    'train': {'datasets': {'REFIT': {'path': 'converted.h5',
                                     'buildings': {17: {'start_time': '2014-06-03',
                                                        'end_time': '2014-06-30'}}}}},
    'test': {'datasets': {'REFIT': {'path': 'converted.h5',
                                    'buildings': {17: {'start_time': '2015-03-01',
                                                       'end_time': '2015-03-02'}}}},
             'metrics': ['rmse']}
}

In [4]:
api_res = API(experiment)

Joint Testing for all algorithms
Loading data for  REFIT  dataset
Dropping missing values
Generating predictions for : PB_Single
Generating predictions for : PB_Multi
............  rmse  ..............
            PB-Single   PB-Multi
television  41.724618  39.228556


In [5]:
# Due to how the API handles the metric testing, both
# PB-Single and PB-Multi have prepended n = window_size number of zeroes
# This happens since PB-NILM uses n timesteps to calculate power at n+1

# It is also possible to specify the number of features by giving the n_features parameter

# Accessing test_data using the API results:
test_data = api_res.test_mains
print(test_data)

[physical_quantity          power
type                      active
Unix                            
2015-03-01 00:00:00+00:00  184.0
2015-03-01 00:00:10+00:00  184.5
2015-03-01 00:00:20+00:00  185.0
2015-03-01 00:00:30+00:00  185.0
2015-03-01 00:00:40+00:00  185.0
...                          ...
2015-03-01 23:59:10+00:00  244.0
2015-03-01 23:59:20+00:00  238.5
2015-03-01 23:59:30+00:00  241.0
2015-03-01 23:59:40+00:00  241.0
2015-03-01 23:59:50+00:00  235.0

[8640 rows x 1 columns]]


In [6]:
# Each algorithm is saved in the dictionary stored in methods
# PB-NILM requires input in a certain shape, made easier by the 'preprocess_x' method

processed_test = api_res.methods['PB-Single'].preprocess_x(test_data)

In [7]:
processed_test['appliance00'].shape

(8620, 20, 1)

In [9]:
# Works as follows:
# 1 - API result object
# 2 - Methods dictionary
# 3 - Algorithm object
# 4 - Disaggregate chunk (override from the Disaggregator class)

ypred_disaggregated = api_res.methods['PB-Single'].disaggregate_chunk(test_data)
ypred_disaggregated



[      television
 0       0.000000
 1       0.000000
 2       0.000000
 3       0.000000
 4       0.000000
 ...          ...
 8615   25.231812
 8616   25.227558
 8617   25.228619
 8618   25.224983
 8619   25.226439
 
 [8640 rows x 1 columns]]

In [10]:
# Alternatively can also access the model dictionary directly for the appliance:

ypred_models = api_res.methods['PB-Single'].models
ypred_models

{'television': <tensorflow.python.keras.engine.training.Model at 0x28ad06e9320>}

In [11]:
# Accessing data using the API object

print('Appliance: {}'.format(api_res.test_submeters[0][0]))
print('Features: {}'.format(api_res.test_submeters[0][1][0]['power'].keys()))
print('Actual data: {}'.format(api_res.test_submeters[0][1][0]['power']))

Appliance: television
Features: Index(['active'], dtype='object', name='type')
Actual data: type                       active
Unix                             
2015-03-01 00:00:00+00:00    30.0
2015-03-01 00:00:10+00:00    29.5
2015-03-01 00:00:20+00:00    29.0
2015-03-01 00:00:30+00:00    29.5
2015-03-01 00:00:40+00:00    29.0
...                           ...
2015-03-01 23:59:10+00:00    19.0
2015-03-01 23:59:20+00:00    19.0
2015-03-01 23:59:30+00:00    19.0
2015-03-01 23:59:40+00:00    19.0
2015-03-01 23:59:50+00:00    19.0

[8640 rows x 1 columns]


In [12]:
from sklearn.metrics import mean_squared_error

In [13]:
# Manual metric calculation

print('gt shape: {}'.format(np.reshape(api_res.test_submeters[0][1][0]['power']['active'].iloc[20:].values, (8620, 1)).shape))
print('yhat shape: {}'.format(np.array(ypred_disaggregated[0])[20:].shape))

temp_mse = mean_squared_error(np.reshape(api_res.test_submeters[0][1][0]['power']['active'].iloc[20:].values, (8620, 1)), 
                              np.array(ypred_disaggregated[0])[20:])

print('mse: {}'.format(temp_mse))
print('rmse: {}'.format(temp_mse**0.5))

gt shape: (8620, 1)
yhat shape: (8620, 1)
mse: 1742.7562896745337
rmse: 41.74633264940208
