In [1]:
from __future__ import print_function, division
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
from nilmtk.datastore import HDFDataStore
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from neuraldisaggregator.RNN.rnndisaggregator import RNNDisaggregator
import neuraldisaggregator.RNN.metrics as metrics
import os
import csv
import pandas as pd
from sklearn import preprocessing
import nilmtk
import os
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [None]:
#For more information on RNN disaggregation implementation: https://github.com/OdysseasKr/neural-disaggregator/tree/master/RNN

# Prerequisites: 
# Dataset converted to HDF5 format[For this project: ukdale_dataset.h5]
# For conversion refer to: [https://github.com/nilmtk/nilmtk]

In [31]:
# set building number from UKDALE dataset for energy disaggregation
building_number = 2
train_building = building_number
test_building = building_number

# set train and test timeframes for selected building
train_end="2013-07-16"
test_start="2013-07-16"

# set resampling period for training and prediction
sample_period = 60

# reading dataset
train = DataSet('ukdale_dataset.h5')
test = DataSet('ukdale_dataset.h5')

#setting timeframes
train.set_window(end=train_end)
test.set_window(start=test_start)

#selecting building 
train_elec = train.buildings[train_building].elec
test_elec = test.buildings[test_building].elec

# The aggregated meter that provides the input
train_mains = train_elec.mains()
test_mains = test_elec.mains()

#Prepare output directories 
output_dir= "RNN_results/house_"+str(train_building)+"/"
intermediate_dir= "RNN_results/house_"+str(train_building)+"_intermediate_files/"
final_dir =  "RNN_results/house_"+str(train_building)+"_final/"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
if not os.path.exists(intermediate_dir):
    os.makedirs(intermediate_dir)
if not os.path.exists(final_dir):
    os.makedirs(final_dir)
    
print("Loaded", len(train.buildings), 'buildings')
print("Selected UKDALE Building Number: "+ str(building_number))
print("Timeframe for training: "+ str(train_elec.get_timeframe()))
print("Timeframe for testing: "+ str(test_elec.get_timeframe()))
print("Output Directory: "+ str(output_dir))
print("Intermediate Directory: "+ str(intermediate_dir))
print("Final Directory: "+ str(final_dir))

Loaded 5 buildings
Selected UKDALE Building Number: 2
Timeframe for training: TimeFrame(start='2013-02-17 15:39:19+00:00', end='2013-07-16 00:00:00+01:00', empty=False)
Timeframe for testing: TimeFrame(start='2013-07-16 00:00:00+01:00', end='2013-10-10 06:16:00.500000+01:00', empty=False)
Output Directory: RNN_results/house_2/
Intermediate Directory: RNN_results/house_2_intermediate_files_test/
Final Directory: RNN_results/house_2_final/


In [32]:
#Creating an object for RNN disaggregation algorithm
rnn = RNNDisaggregator()

In [46]:
def train_RNN(rnn, train_elec, train_building, meter_key, sample_period):
    """
    Train the dataset on RNN algorithm
    
    Input:
    
    rnn  = Algorithm Object
    train_elec =  Training data  
    train_building = Building number
    meter_key = Appliance meter key
    sample_period = Resampling period
    
    Output:
    
    rnn =  Trained algorithm Object
    train_meter = Appliance meter
     
    """
    train_mains = train_elec.mains()
    train_meter = train_elec.submeters()[meter_key] # The microwave meter that is used as a training target
    start = time.time()
    print("========== TRAIN ============")
    epochs = 0
    for i in range(3):
        print("CHECKPOINT {}".format(epochs))
        rnn.train(train_mains, train_meter, epochs=5, sample_period=int(sample_period))
        epochs += 5
        rnn.export_model("UKDALE-RNN-h{}-{}-{}epochs.h5".format(train_building,
                                                            meter_key,
                                                            epochs))
    end = time.time()
    print("Train =", end-start, "seconds.")
    return rnn, train_meter
    
def predict_RNN(rnn, meter_key, test_mains, train_meter, sample_period, intermediate_dir):
    """
    Predict the disaggregated data using RNN algorithm
    
    Input:
    
    rnn = Trained algorithm object
    meter_key  = Algorithm Object
    test_mains= Mains data for testing
    train_meter= Appliance meter
    sample_period= Resampling period
    intermediate_dir= Intermediate directory
    
    Output:
    
    disag_filename = Disaggregated data h5 output file for current appliance
     
    """
    print("========== DISAGGREGATE ============")
    disag_filename = intermediate_dir+'disag-out_'+str(meter_key)+'.h5' # The filename of the resulting datastore
    output = HDFDataStore(disag_filename, 'w')
    rnn.disaggregate(test_mains, output, train_meter, sample_period=int(sample_period))
    output.close()
    return disag_filename
    
def prepare_Results(disag_filename, meter_key, train_building, output_dir):
    """
    Prepare the disaggregated results and save the data to csv
    
    Input:
    
    disag_filename = Disaggregated data h5 output file for current appliance
    meter_key  = Algorithm Object
    train_building = Building number
    output_dir= Output directory
    
    Output:
    
    No Output
     
    """
    print("========== PREPARE RESULTS ============")
    result = DataSet(disag_filename)
    res_elec = result.buildings[train_building].elec
    meter_name= meter_key
    if meter_key.isnumeric():
        meter_key = int(meter_key)
        meter_name="laptop computer_2"
    rnn_appliance=next(res_elec[meter_key].load())
    rnn_appliance.columns=[meter_name]
    rnn_appliance.index = rnn_appliance.apply(lambda row: modify_index(row.name),axis=1)
    rnn_appliance.index.name="Date"
    rnn_appliance.to_csv(output_dir+meter_name+".csv")
    
def modify_index(index):
    """
    Remove +1:00 and +0.00 from index and convert to datetime
    
    Input:
    
    index= Dataframe index
    
    Output:
    
    index = Index converted to datetime object
     
    """
    index= str(index).replace("+01:00", "")
    index= str(index).replace("+00:00", "")
    return pd.to_datetime(index)

def evaluation_results(disag_filename, test_building, test_elec, meter_key, metrics_df, rmse_df):
    """
    Get evaluation results and save them in dataframe
    
    Input:
    
    disag_filename = Disaggregated data h5 output file for current appliance
    test_building = Building number
    test_elec = Test data
    meter_key = Appliance key
    metrics_df = Accuracy, Precision, Recall, F1 dataframe
    rmse_df = RMSE, MAE dataframe
    
    Output:
    
    metrics_df = Updated Accuracy, Precision, Recall, F1 dataframe
    rmse_df = Updated RMSE, MAE dataframe
     
    """
    meter_name = meter_key
    if meter_key.isnumeric():
        meter_key= int(meter_key)
        meter_name = "laptop computer"
    print("========== RESULTS FOR " + meter_name +"============")
    result = DataSet(disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key])
    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))
    print("============ Relative error in total energy: {}".format(metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key])))
    print("============ Mean absolute error(in Watts): {}".format(metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])))
    metrics_df=metrics_df.append({'Appliance' : meter_name , 'Accuracy' : float(rpaf[2]), 'Precision' : float(rpaf[1]), 'Recall' : float(rpaf[0]), 'F1' : float(rpaf[3])} , ignore_index=True)
    rmse_df = rmse_df.append({'Appliance' : meter_name , 'RMSE' : float(metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key])), 'MAE' : float(metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])) }, ignore_index=True)
    return metrics_df, rmse_df

def to_timestamp(date):
    """
    Convert Date to Rimestamp
    
    Input:
    
    Date = Date Object
    
    Output:
    
    Timestamp = Timestamp for input date  
    """
    return time.mktime(time.strptime(date, '%Y-%m-%d %H:%M:%S'))

def generate_output_files(building_number, Pred_RNN):
    """
    Generating diaggregated output files for further process[Pattern extraction]
    
    Input:
    
    building_number: Building number
    Pred_RNN: RNN algorithm disaggregation results for all appliance[Dataframe]
    
    Output:
    
    None
    """
    Pred_RNN_final = Pred_RNN.reset_index()
    Pred_RNN_final.Date=Pred_RNN_final.apply(lambda row: to_timestamp(str(row.Date)), axis=1)
    Pred_RNN_final=Pred_RNN_final.set_index('Date')
    channel_number =2
    labels = ["Aggregate"]
    idx = [1]
    for column in Pred_RNN_final.columns:
        Pred_RNN_final[column].to_csv(final_dir+"channel_"+str(channel_number)+".dat",sep=' ', header=False)
        labels.append(str(column))
        idx.append(channel_number)
        channel_number += 1
    labels_df= pd.DataFrame(labels, index=idx) 
    labels_df.to_csv(final_dir+"labels.dat",sep=' ', header=False)

In [34]:
# Appliance list from building
app_list = ["laptop computer","fridge","active speaker","microwave","computer","computer monitor","broadband router","kettle","dish washer","external hard disk","rice cooker","running machine","washing machine","toaster","games console","modem","cooker"]

In [35]:
metrics_df = pd.DataFrame(columns=('Appliance','Accuracy', 'Precision', 'Recall', "F1"))
rmse_df = pd.DataFrame(columns=('Appliance','RMSE', 'MAE'))

#Start Training and Prediction for all the appliances in building
for item in app_list:
    meter_key = item
    print(meter_key)
    rnn, train_meter=train_RNN(rnn, train_elec, train_building, meter_key, sample_period)
    disag_filename = predict_RNN(meter_key, test_mains, train_meter, sample_period, intermediate_dir)
    prepare_Results(disag_filename, meter_key, train_building, output_dir)
    metrics_df, rmse_df= evaluation_results(disag_filename, test_building, test_elec, meter_key, metrics_df, rmse_df)
    print("Done training and prediction for "+str(meter_key))

# for building 2 having double instance of laptop computer
if train_building == 2:
    meter_key="11"
    rnn, train_meter=train_RNN(rnn, train_elec, train_building, int(meter_key), sample_period)
    disag_filename = predict_RNN(rnn, meter_key, test_mains, train_meter, sample_period, intermediate_dir)
    prepare_Results(disag_filename, meter_key,train_building, output_dir)
    metrics_df, rmse_df= evaluation_results(disag_filename, test_building, test_elec, meter_key, metrics_df, rmse_df)
    print("Done training and prediction for laptop computer 2")

fridge
CHECKPOINT 0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 10
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train = 188.38674974441528 seconds.
New sensible chunk: 14630
New sensible chunk: 21960
Done training and prediction for fridge
active speaker
CHECKPOINT 0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 10
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train = 294.2468349933624 seconds.
New sensible chunk: 14630
New sensible chunk: 21960


  return tp/float(tp+fp)


Done training and prediction for active speaker
CHECKPOINT 0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CHECKPOINT 10
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train = 293.70927000045776 seconds.
New sensible chunk: 14630
New sensible chunk: 21960


  return tp/float(tp+fp)


Done training and prediction for laptop computer 2


In [36]:
# Appliance list for building 2 used for further merging 
app_list = ["laptop computer","laptop computer_2","fridge","microwave","active speaker","computer","computer monitor","broadband router","kettle","dish washer","external hard disk","rice cooker","running machine","washing machine","toaster","games console","modem","cooker"]
RNN={}

In [37]:
# Read all appliance's disaggregated data with resampled to 2 mins and create a dictionary
for item in app_list:
    print(item)
    RNN[item]=pd.read_csv(output_dir+item+".csv", sep=',',parse_dates=['Date'], header=0, index_col="Date")
    RNN[item]= pd.DataFrame(RNN[item].resample("2T").mean())

laptop computer
laptop computer_2
fridge
active speaker


In [38]:
# Merge all appliance's disaggregated data from generated dictionary and update columns[capitalize]
Pred_RNN= RNN["laptop computer"]
new_columns=[]
for item in app_list:
    new_columns.append(item.capitalize())
    if item != "laptop computer":
        Pred_RNN = pd.merge(Pred_RNN, RNN[item], left_index=True, right_index=True)
Pred_RNN= Pred_RNN.fillna(0)
Pred_RNN.columns = new_columns

In [39]:
Pred_RNN

Unnamed: 0_level_0,Laptop computer,Laptop computer_2,Fridge,Active speaker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-07-15 23:00:00,8.125856,3.098091,49.976055,4.313291
2013-07-15 23:02:00,8.123927,3.088846,49.901268,4.307250
2013-07-15 23:04:00,8.124178,3.090047,49.910988,4.308034
2013-07-15 23:06:00,8.116787,3.054549,49.623860,4.284833
2013-07-15 23:08:00,8.118645,3.063488,49.696163,4.290675
...,...,...,...,...
2013-10-10 05:08:00,7.753389,1.122778,34.037440,3.019436
2013-10-10 05:10:00,7.751366,1.110994,33.942450,3.011694
2013-10-10 05:12:00,7.752123,1.115406,33.978020,3.014593
2013-10-10 05:14:00,7.753859,1.125509,34.059452,3.021231


In [40]:
#Generating Output files as ground truth[channel_1.dat, channel_2.dat, etc.]
generate_output_files(train_building,Pred_RNN)