# MeterGroup to Dataframe

# Initialization for Python and NILMTK

Let's kick-off to process and analysis the data with Python.

In [1]:
import dateutil
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from datetime import datetime

import nilmtk as ntk
import utility.tools as tools

## Define constant and global variable

In [3]:
warnings.filterwarnings("ignore")
plt.rcParams['figure.figsize'] = [15, 10]

RAW_FILENAME = "../../../ukdale/ukdale.h5"

START_TS ='2014-01-01 00:00:00'
END_TS='2014-01-31 23:59:59'

HOUSE_NUMBER = 1

## Create objects - nilmtk.DataSet

In [4]:
# Create Dataset object for UK-DALE
ukdale_ds = ntk.DataSet(RAW_FILENAME)

# Set the duration window from START_TS to END_TS
ukdale_ds.set_window(start=START_TS,end=END_TS)

# Create MeterGroup for house_data
# using global variable "HOUSE_NUMBER"
#
house_data = ukdale_ds.buildings[HOUSE_NUMBER].elec

### List of appliances in list

In [5]:
type(house_data.appliances)

list

In [6]:
house_data.appliances

[Appliance(type='vacuum cleaner', instance=1),
 Appliance(type='light', instance=3),
 Appliance(type='light', instance=6),
 Appliance(type='light', instance=12),
 Appliance(type='laptop computer', instance=2),
 Appliance(type='wireless phone charger', instance=1),
 Appliance(type='light', instance=9),
 Appliance(type='radio', instance=2),
 Appliance(type='broadband router', instance=2),
 Appliance(type='light', instance=15),
 Appliance(type='immersion heater', instance=1),
 Appliance(type='washer dryer', instance=1),
 Appliance(type='food processor', instance=2),
 Appliance(type='mobile phone charger', instance=1),
 Appliance(type='audio system', instance=2),
 Appliance(type='desktop computer', instance=1),
 Appliance(type='laptop computer', instance=1),
 Appliance(type='fan', instance=2),
 Appliance(type='coffee maker', instance=1),
 Appliance(type='USB hub', instance=1),
 Appliance(type='security alarm', instance=1),
 Appliance(type='solar thermal pumping station', instance=1),
 Appl

### Show house_data

In [7]:
house_data

MeterGroup(meters=
  ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])
  ElecMeter(instance=3, building=1, dataset='UK-DALE', appliances=[Appliance(type='solar thermal pumping station', instance=1)])
  ElecMeter(instance=4, building=1, dataset='UK-DALE', appliances=[Appliance(type='laptop computer', instance=1), Appliance(type='laptop computer', instance=3)])
  ElecMeter(instance=5, building=1, dataset='UK-DALE', appliances=[Appliance(type='washer dryer', instance=1), Appliance(type='washer dryer', instance=2)])
  ElecMeter(instance=6, building=1, dataset='UK-DALE', appliances=[Appliance(type='dish washer', instance=1)])
  ElecMeter(instance=7, building=1, dataset='UK-DALE', appliances=[Appliance(type='television', instance=1)])
  ElecMeter(instance=8, building=1, dataset='UK-DALE', appliances=[Appliance(type='light', instance=1), Appliance(type='light', instance=2)])
  ElecMeter(instance=9, building=1, dataset='UK-DALE', appliances

# Transforming from MeterGroup to DataFrame

## Using API - select_using_appliances

Example:
- select_using_appliances(category='lighting')
- select_using_appliances(type='fridge')
- select_using_appliances(type=['fridge', 'kettle', 'toaster'])
- select_using_appliances(building=1, category='lighting')
- select_using_appliances(room='bathroom')

In [8]:
# Create custom MeterGroup
custom_mg = house_data.select_using_appliances(type=['microwave', 'kettle', 'toaster'])

# Extract data to dataframe
custom_mg_df = custom_mg.dataframe_of_meters()

# Show summary of dataframe
custom_mg_df.describe()

Unnamed: 0,"(13, 1, UK-DALE)","(10, 1, UK-DALE)","(11, 1, UK-DALE)"
count,446399.0,446400.0,446400.0
mean,5.547256,11.711927,6.76375
std,83.382286,157.690262,102.883743
min,0.0,0.0,0.0
25%,1.0,1.0,0.0
50%,1.0,1.0,0.0
75%,1.0,1.0,0.0
max,3267.0,3114.0,2702.0


### Change the column header for dataframe

In [9]:
# Change readable column name
custom_mg_df.columns = house_data.get_labels(custom_mg_df.columns)

custom_mg_df.head()

Unnamed: 0,Microwave,Kettle,Toaster
2014-01-01 00:00:00+00:00,1.0,1.0,1.0
2014-01-01 00:00:06+00:00,1.0,1.0,1.0
2014-01-01 00:00:12+00:00,1.0,1.0,1.0
2014-01-01 00:00:18+00:00,1.0,1.0,1.0
2014-01-01 00:00:24+00:00,1.0,1.0,1.0


### Export to dataframe to CSV

In [10]:
custom_mg_df.to_csv("custom_mg_df.csv")

In [11]:
csv_custom_mg_df = pd.read_csv("custom_mg_df.csv")

csv_custom_mg_df.describe()

Unnamed: 0,Microwave,Kettle,Toaster
count,446399.0,446400.0,446400.0
mean,5.547256,11.711928,6.76375
std,83.382286,157.690255,102.883743
min,0.0,0.0,0.0
25%,1.0,1.0,0.0
50%,1.0,1.0,0.0
75%,1.0,1.0,0.0
max,3267.0,3114.0,2702.0


## Using API of Top K of appliances - select_top_k

In [12]:
# Select top 5 objects from MeterGroup, return MeterGroup object 
mg_top = house_data.submeters().select_top_k(k=3)

print("\n\nData Type of mg_top is {}.".format(type(mg_top))) 

# Show the selected MeterGroup values
mg_top

52/52 ElecMeter(instance=53, building=1, dataset='UK-DALE', appliances=[Appliance(type='printer', instance=1)])ance=1)])e(type='external hard disk', instance=1)])e=2), Appliance(type='radio', instance=3)])1)])

Data Type of mg_top is <class 'nilmtk.metergroup.MeterGroup'>.


MeterGroup(meters=
  ElecMeter(instance=12, building=1, dataset='UK-DALE', appliances=[Appliance(type='fridge freezer', instance=1)])
  ElecMeter(instance=5, building=1, dataset='UK-DALE', appliances=[Appliance(type='washer dryer', instance=1), Appliance(type='washer dryer', instance=2)])
  ElecMeter(instance=25, building=1, dataset='UK-DALE', appliances=[Appliance(type='light', instance=16)])
)

In [13]:
# Load data to dataframe from MeterGroup
mg_top_df = mg_top.dataframe_of_meters()

# Change readable column name
mg_top_df.columns = house_data.get_labels(mg_top_df.columns)

# Show dataframe info for MeterGroup
mg_top_df.describe()

Unnamed: 0,Fridge freezer,Washer dryer,Light
count,446400.0,446399.0,446400.0
mean,36.816189,31.805964,23.749281
std,53.171547,220.907303,48.031818
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,0.0,0.0,0.0
75%,86.0,0.0,21.0
max,1850.0,3963.0,343.0


## Using customized MetaGroup and specfic instance number(s) 

In [14]:
my_em_2 = house_data.__getitem__(2)
my_em_3 = house_data.__getitem__(3)
my_em_5 = house_data.__getitem__(5)
my_em_7 = house_data.__getitem__(7)
my_em_8 = house_data.__getitem__(8)

mg = ntk.MeterGroup([my_em_2, my_em_3, my_em_5, my_em_7, my_em_8])
mg

MeterGroup(meters=
  ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])
  ElecMeter(instance=3, building=1, dataset='UK-DALE', appliances=[Appliance(type='solar thermal pumping station', instance=1)])
  ElecMeter(instance=5, building=1, dataset='UK-DALE', appliances=[Appliance(type='washer dryer', instance=1), Appliance(type='washer dryer', instance=2)])
  ElecMeter(instance=7, building=1, dataset='UK-DALE', appliances=[Appliance(type='television', instance=1)])
  ElecMeter(instance=8, building=1, dataset='UK-DALE', appliances=[Appliance(type='light', instance=1), Appliance(type='light', instance=2)])
)

In [15]:
df = mg.dataframe_of_meters()

df.shape

(446400, 5)

In [16]:
# Change readable column name
df.columns = house_data.get_labels(df.columns)

df.describe()

Unnamed: 0,Boiler,Solar thermal pumping station,Washer dryer,Television,Light
count,446400.0,446400.0,446399.0,446399.0,446400.0
mean,19.932907,3.676338,31.805964,9.733294,14.715464
std,25.161802,13.268074,220.907303,28.423477,43.610966
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,1.0,0.0
50%,12.0,0.0,0.0,1.0,0.0
75%,12.0,0.0,0.0,1.0,0.0
max,122.0,62.0,3963.0,1248.0,345.0


# Exploring more APIs of MeterGroup

In [17]:
e = house_data.meters[0]
e

ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])

In [18]:
e.device

{'data_logger': {'creators': ['Jack Kelly'],
  'model': 'rfm_ecomanager_logger',
  'model_url': 'https://github.com/JackKelly/rfm_ecomanager_logger'},
 'manufacturer': 'Current Cost',
 'max_sample_period': 120,
 'measurements': [{'upper_limit': 25000,
   'lower_limit': 0,
   'physical_quantity': 'power',
   'type': 'apparent'}],
 'model': 'CurrentCost Tx',
 'model_url': 'http://www.currentcost.com/product-transmitter.html',
 'sample_period': 6,
 'wireless': True,
 'wireless_configuration': {'base': 'creators: [Jack Kelly] model: rfm_edf_ecomanager model_url: https://github.com/JackKelly/rfm_edf_ecomanager/\n',
  'protocol': 'custom',
  'carrier_frequency': 434}}

In [19]:
e.identifier

ElecMeterID(instance=2, building=1, dataset='UK-DALE')

In [20]:
e.instance

<bound method ElecMeter.instance of ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])>

In [21]:
e.key

'/building1/elec/meter2'

In [22]:
e.when_on

<bound method Electric.when_on of ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])>

## API - Get item from class of nilmtk.elecmeter.ElecMeter 

### Get item by instance number and return ElecMeter

In [24]:
# Retrieve 
myitem1 = house_data.__getitem__(2)

type(myitem1)

nilmtk.elecmeter.ElecMeter

In [25]:
myitem1

ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])

In [26]:
myitem1.device

{'data_logger': {'creators': ['Jack Kelly'],
  'model': 'rfm_ecomanager_logger',
  'model_url': 'https://github.com/JackKelly/rfm_ecomanager_logger'},
 'manufacturer': 'Current Cost',
 'max_sample_period': 120,
 'measurements': [{'upper_limit': 25000,
   'lower_limit': 0,
   'physical_quantity': 'power',
   'type': 'apparent'}],
 'model': 'CurrentCost Tx',
 'model_url': 'http://www.currentcost.com/product-transmitter.html',
 'sample_period': 6,
 'wireless': True,
 'wireless_configuration': {'base': 'creators: [Jack Kelly] model: rfm_edf_ecomanager model_url: https://github.com/JackKelly/rfm_edf_ecomanager/\n',
  'protocol': 'custom',
  'carrier_frequency': 434}}

### Get item by appliance name & return ElecMeter object

In [27]:
myitem2 = house_data.__getitem__('light')

In [28]:
myitem2

ElecMeter(instance=8, building=1, dataset='UK-DALE', appliances=[Appliance(type='light', instance=1), Appliance(type='light', instance=2)])

In [29]:
myitem2.instance

<bound method ElecMeter.instance of ElecMeter(instance=8, building=1, dataset='UK-DALE', appliances=[Appliance(type='light', instance=1), Appliance(type='light', instance=2)])>

### Get item by specifying in ElecMeter format 

```
(2, 1, 'UK-DALE')
```
***means as below:***
```
ElecMeter(instance=2, building=1, dataset='UK-DALE')
```

In [30]:
myitem3 = house_data.__getitem__((2, 1, 'UK-DALE'))

type(myitem3)

nilmtk.elecmeter.ElecMeter

In [31]:
type(myitem3.identifier)

nilmtk.elecmeter.ElecMeterID

In [32]:
myitem3.identifier

ElecMeterID(instance=2, building=1, dataset='UK-DALE')

### Get item by using ElecMeterID

In [33]:
from collections import namedtuple
ElecMeterID = namedtuple('ElecMeterID', ['instance', 'building', 'dataset'])

my_emid_2 = ElecMeterID(instance=2,
                    building=1,
                    dataset='UK-DALE')

my_emid_3 = ElecMeterID(instance=3,
                    building=1,
                    dataset='UK-DALE')

my_emid_5 = ElecMeterID(instance=5,
                    building=1,
                    dataset='UK-DALE')

In [34]:
my_emid_2

ElecMeterID(instance=2, building=1, dataset='UK-DALE')

In [35]:
myobj = house_data.__getitem__(my_emid_2)
myobj

ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])

## Get two appliances, and return MeterGroup

### Specifies in instance number

In [36]:
myitem4 = house_data.__getitem__((2,3))
#(1,2), 1, 'REDD')`
type(myitem4)

nilmtk.metergroup.MeterGroup

In [37]:
myitem4

MeterGroup(meters=
  ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])
  ElecMeter(instance=3, building=1, dataset='UK-DALE', appliances=[Appliance(type='solar thermal pumping station', instance=1)])
)

### Specifies in ElecMeter fromat

In [38]:
myitem5 = house_data.__getitem__(((2, 1, 'UK-DALE'), (6, 1, 'UK-DALE')))
type(myitem5)

nilmtk.metergroup.MeterGroup

In [39]:
myitem5

MeterGroup(meters=
  ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])
  ElecMeter(instance=6, building=1, dataset='UK-DALE', appliances=[Appliance(type='dish washer', instance=1)])
)

### Specfiies in ElectMeterID

In [40]:
ElecMeterID = namedtuple('ElecMeterID', ['instance', 'building', 'dataset'])

my_emid_2 = ElecMeterID(instance=2,
                    building=1,
                    dataset='UK-DALE')

my_emid_3 = ElecMeterID(instance=3,
                    building=1,
                    dataset='UK-DALE')

my_emid_5 = ElecMeterID(instance=5,
                    building=1,
                    dataset='UK-DALE')

In [41]:
myobj = house_data.__getitem__((my_emid_2, my_emid_3))
myobj

MeterGroup(meters=
  ElecMeter(instance=2, building=1, dataset='UK-DALE', appliances=[Appliance(type='boiler', instance=1)])
  ElecMeter(instance=3, building=1, dataset='UK-DALE', appliances=[Appliance(type='solar thermal pumping station', instance=1)])
)

# About -  <class 'tuple'>

In [42]:
a = (1, 3, 4)
type(a)

tuple

In [43]:
b = ((1, 2, 'aa'), (3, 4, 'ab'))
type(b)

tuple

In [44]:
for itm in b:
    print("{} - {}".format(type(itm), itm))

<class 'tuple'> - (1, 2, 'aa')
<class 'tuple'> - (3, 4, 'ab')


In [45]:
c = [(1, 2, 'aa'), (3, 4, 'ab')]
type(c)

list

In [46]:
for itm in c:
    print("{} - {}".format(type(itm), itm))

<class 'tuple'> - (1, 2, 'aa')
<class 'tuple'> - (3, 4, 'ab')
