# Nilm experiments notebook

The scope of this notebook  is to examine differents parameters of the problem, such as:

1) Lower sampling rate

2) Higher event detection threshold

3) Fewer/More time points on each event




In [60]:
#!/usr/bin/env python3

import sys
import pandas as pd
import datetime
import requests
import numpy as np
from pandas import ExcelWriter
import os
import glob
import pytz
from dateutil.tz import gettz
import timeit

# from datetime import datetime
from datetime import timedelta
import time
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

pd.set_option('display.max_rows', 50000)
pd.set_option('display.max_columns', 50000)

In [30]:
def read_data(devid, acc_token, address, start_time, end_time, descriptors):


    r2 = requests.get(
        url=address + "/api/plugins/telemetry/DEVICE/" + devid + "/values/timeseries?keys=" + descriptors + "&startTs=" + start_time + "&endTs=" + end_time + "&agg=NONE&limit=1000000",
        headers={'Content-Type': 'application/json', 'Accept': '*/*', 'X-Authorization': acc_token}).json()
    if r2:
        df = pd.DataFrame([])

        for desc in r2.keys():
            df1 = pd.DataFrame(r2[desc])
            df1.set_index('ts', inplace=True)
            df1.columns = [str(desc)]
            df = pd.concat([df, df1], axis=1)

        
        if df.empty == False:

            df.reset_index(drop=False, inplace=True)
            df = df.sort_values(by=['ts'])
            df.reset_index(drop=True, inplace=True)
            df.set_index('ts', inplace=True, drop=True)
            for col in df.columns:
                df[col] = df[col].astype('float')

            df = df.groupby(df.index).max()
            
        else:
            df = pd.DataFrame([])
    else:
        df = pd.DataFrame([])
#         print('Empty json!')
    return df

def request_data(start_time,end_time,devid,acc_token,address,descriptors):
    df = pd.DataFrame([])
    svec = np.arange(int(start_time[0]),int(end_time[0]),3600000)
    hour = 1
    for st in svec:
    #     print(hour)
        hour = hour+1
        en = st+3600000-1

        if int(end_time[0])-en<=0: en = int(end_time[0])
    #         print('start and end of iteration:',st,en)

        tmp = read_data(devid, acc_token, address, str(st), str(en), descriptors)
        if not tmp.empty:
            df = pd.concat([df,tmp])


    df['ts'] = pd.to_datetime(df.index,utc=True, unit='ms')
    df['ts'] = df['ts'].dt.tz_convert('Europe/Athens')

    df.set_index('ts',inplace=True, drop=True)
    return df


def run_models(df,maphase,phase,mdlphase,mdlpath):
    step=5
    events = []
    nums = []
    state = []
    ev_ts = []
    dpwr = [] # delta active power --> |previous power - current power|
    conflicts = {}
    ln=0
    i=0
    points = 70
    fr = 40 # frequency is 40msec
    
    print('phase:',phase)
    while ln<=df.shape[0]-(points+1):
        dt1 = df.index[ln]
        dt2 = df.index[ln]+datetime.timedelta(milliseconds=fr*points)
        
        ln = ln+df[dt1:dt2].shape[0]

        slot = df[maphase[phase]].copy()
        slot = slot[dt1:dt2]
        if slot.shape[0]>50:
#             print('slot:',dt1,dt2)
            tsm = slot.index[-1]
            slot.columns = ['pwr','rpwr']
            slot.reset_index(inplace=True, drop=True)
            steady = slot.iloc[:10].copy()
            change = slot.iloc[-50:].copy()
            change.reset_index(inplace=True, drop=True)

            # discover if appliance has been added or removed
            if (np.mean(change['pwr'].iloc[-(2*step):-step])-np.mean(steady['pwr']))>0:
                st = 1
            else:
                st = 0

            # subtract active & reactive power of steady state
            change['pwr'] = np.abs(change['pwr']-np.mean(steady['pwr']))
            change['rpwr'] = np.abs(change['rpwr']-np.mean(steady['rpwr']))
            change.dropna(inplace=True)

            cols = ['pwr','rpwr']

            df_pr = pd.DataFrame([])
            for col in cols:
                df_pr[col+'_mean'] = change[col].groupby(np.arange(len(change))//step).mean()
                df_pr[col+'_std'] = change[col].groupby(np.arange(len(change))//step).std()
                df_pr[col+'_min'] = change[col].groupby(np.arange(len(change))//step).min()
                df_pr[col+'_max'] = change[col].groupby(np.arange(len(change))//step).max()
    #             df_pr[col+'_skew'] = change[col].groupby(np.arange(len(change))//step).skew()
    #             df_pr[col+'_kurt'] = change[col].groupby(np.arange(len(change))//step).apply(pd.Series.kurt)
            df_pr.dropna(inplace=True)

            assigned = False

            # run all models for this phase
            for j in range(0,len(mdlphase[phase])):
                filename = mdlpath+str(mdlphase[phase][j])+'.sav'
                mdl = pickle.load(open(filename, 'rb'))
                y_pred = mdl.predict(df_pr)
                #print(mdlphase[phase][j],y_pred)
                if np.sum(y_pred)>=0.6*len(y_pred):

                    if assigned==False: 
                        dpwr.append(np.abs(np.mean(change['pwr'].iloc[-(2*step):-step])-np.mean(steady['pwr'])))
                        nums.append(np.sum(y_pred))
                        events.append(mdlphase[phase][j])
                        print(mdlphase[phase][j], tsm)
                        ev_ts.append(tsm)
                        state.append(st)
                        assigned=True

                        i=i+1
                    else:
                        if np.sum(y_pred)>=nums[i-1]:
                            conflicts[ev_ts[i-1]]= [mdlphase[phase][j]]
    #end of while
    
    ev = confl_postproc(events,state,ev_ts,conflicts,dpwr)
    return ev


def events_clearing(ev,events,mappings):
    # convert categorical variables to numeric
    if not ev.empty:
        ev.replace({'appl': { v : k for k, v in mappings.items() }},inplace=True)

        ev = ev.resample('1S').max()
        globals()['ev%s' % phase] = ev.copy()

        # append events dataframes to dictionary
        events.append(globals()['ev%s' % phase])
    #     mappings.append(globals()['d%s' % phase])
    
    return events    



def confl_postproc(events,state,ev_ts,conflicts,dpwr):
    
    ev = pd.DataFrame([])
    ev['appl'] = events
    ev['state'] = state
    ev['ts'] = ev_ts
    ev['dpwr'] = dpwr
    ev=ev.dropna()
    ev.set_index('ts',inplace=True)

    if len(conflicts)>0: # if there are conflicts
        confl = pd.DataFrame(conflicts).T
        confl.columns = ['conflict']
        ev = pd.concat([ev,confl],axis=1)

        
        for i in range(5,ev.shape[0]-5):
            if pd.isna(ev['conflict'].iloc[i])==False:
        #         print(ev['conflict'].iloc[i],ev['appl'].iloc[i],ev['appl'].iloc[i-1])
        
                # check neighborhood -- 5 previous and 5 next points-- to decide if conflict will replace value
                if ev['conflict'].iloc[i]==ev['appl'].iloc[i-5:i+5].value_counts()[:1].index.tolist()[0]:
                    #print('appliance before conflict:',ev.iloc[i])
                    ev['appl'].iloc[i] = ev['conflict'].iloc[i]
                    #print('appliance after conflict:',ev.iloc[i])
                    
    else:
        ev['conflict'] = np.nan
#         ev.drop('conflict',axis=1,inplace=True)

    return ev

 
# heatpump post process
def hp_postproc(events,mappings):
    phases = ['A','B','C']
    
    if len(events)==3:
        # encode conflicts column with corresponding dictionary values
        for i in range(0,3):
            if events[i]['conflict'].notnull().sum()>0:
                events[i].replace({'conflict': { v : k for k, v in mappings[i].items() }},inplace=True)

        # indexes of rows where heatpump was found at each phase
        indA= events[0].loc[events[0]['appl'] == max(mappings[0], key=lambda k: mappings[0][k] == 'heatpumpA')].index
        indB= events[1].loc[events[1]['appl'] == max(mappings[1], key=lambda k: mappings[1][k] == 'heatpumpB')].index
        indC= events[2].loc[events[2]['appl'] == max(mappings[2], key=lambda k: mappings[2][k] == 'heatpumpC')].index

        # if there is an intersection between at least two phases' indexes, assign appliance=heatpump on the ohter phase
        events[0].loc[indB.intersection(indC),'appl']=max(mappings[0], key=lambda k: mappings[0][k] == 'heatpumpA')
        events[1].loc[indA.intersection(indC),'appl']=max(mappings[1], key=lambda k: mappings[1][k] == 'heatpumpB')
        events[2].loc[indA.intersection(indB),'appl']=max(mappings[2], key=lambda k: mappings[2][k] == 'heatpumpC')

        # if heatpump is found at only one phase, this is a false positive. Replace with conflict or leave empty
        events[0].loc[indA.difference(indB),'appl'] = events[0].loc[indA.difference(indB),'conflict']
        events[1].loc[indB.difference(indC),'appl'] = events[1].loc[indB.difference(indC),'conflict']
        events[2].loc[indC.difference(indA),'appl'] = events[2].loc[indC.difference(indA),'conflict']

    return events


def postproc(events):
    # drop events corresponding to only one appearance of an appliance 
    for i in range(0,len(events)):
        singlapp = events[i]['appl'].value_counts()
        if singlapp[singlapp==1].shape[0]>0:
            events[i] = events[i][events[i]['appl'] != singlapp[singlapp==1].index.values[0]]
            print('appliance with one appearance:',mappings[i][singlapp[singlapp==1].index.values[0]])
    return events


In [31]:
# set phase and appliances' mappings to dictionaries
mdlpath = '../../Desktop/windowsshare/stelios_data/models/'
maphase = {'A':['pwrA','rpwrA'],'B':['pwrB','rpwrB'],'C':['pwrC','rpwrC']}
mdlphase = {'A':['entilator','fridge','heatpumpA','oven','stove','vacuum','wash'],'B':['coffee','dish','freezer','heatpumpB'],'C':['iron','ironpress','heatpumpC','PC']}

# devmap = {'mdl0':'dishwasher','mdl1':'entilator','mdl2':'freezer','mdl3':'fridge','mdl4':'heatpumpA','mdl5':'heatpumpB','mdl6':'heatpumpC','mdl7':'iron','mdl8':'ironpress','mdl9':'Oven','mdl10':'stove','mdl11':'vacuum','mdl12':'washingMashine'}

# Download events for phase
devid = '71749ca0-9a6a-11ea-8d54-4d0d5d00237b'


year = 2020
month = 8
day = 23

tmzn = 'Europe/Athens'
st = datetime.datetime(year,month, day, tzinfo = gettz(tmzn))
daily_offset = 86400000
print('start:',st)

start_time = [int(st.timestamp()) * 1000] 
end_time = [start_time[0]+daily_offset]

address = "http://52.77.235.183:8080"

r = requests.post(address + "/api/auth/login",
                  json={'username': 'tenant@thingsboard.org', 'password': 'tenant'}).json()

acc_token = 'Bearer' + ' ' + r['token']
##########################
# timediff = int(end_time[0])-int(start_time[0])



start: 2020-08-23 00:00:00+03:00


In [32]:
events = []
mappings=[]
for key,value in mdlphase.items():
    # create list  of dictionaries for each phase's appliances with integer keys
    mappings.append(dict(zip(range(len(value)),value)))

i=0
for phase in ['A','B','C']:
    print('phase ',phase)
    descriptors = maphase[phase][0]+','+maphase[phase][1]

    df = request_data(start_time,end_time,devid,acc_token,address,descriptors)
    
    ev = run_models(df,maphase,phase,mdlphase,mdlpath)
    events = events_clearing(ev,events,mappings[i])
#     print(ev)
    i = i+1

# ensure heatpump appears to function across all 3 phases, otherwise it's a false positive
events = hp_postproc(events,mappings)
# remove events of appliances that appear only once
events = postproc(events)

phase  A
phase: A
fridge 2020-08-23 00:00:55.953000+03:00
fridge 2020-08-23 00:38:44.327000+03:00
fridge 2020-08-23 00:56:02.456000+03:00
fridge 2020-08-23 01:32:24.040000+03:00
entilator 2020-08-23 01:40:15.982000+03:00
fridge 2020-08-23 01:55:55.982000+03:00
fridge 2020-08-23 02:13:41.261000+03:00
fridge 2020-08-23 04:32:19.760000+03:00
fridge 2020-08-23 04:49:37.023000+03:00
fridge 2020-08-23 05:31:41.794000+03:00
fridge 2020-08-23 05:49:18.608000+03:00
fridge 2020-08-23 06:30:07.091000+03:00
fridge 2020-08-23 07:27:01.041000+03:00
fridge 2020-08-23 07:44:48.808000+03:00
fridge 2020-08-23 09:20:02.734000+03:00
fridge 2020-08-23 09:38:20.651000+03:00
fridge 2020-08-23 10:19:27.849000+03:00
fridge 2020-08-23 10:37:33.593000+03:00
fridge 2020-08-23 11:16:52.999000+03:00
fridge 2020-08-23 12:12:20.692000+03:00
fridge 2020-08-23 12:30:39.730000+03:00
fridge 2020-08-23 13:10:58.615000+03:00
fridge 2020-08-23 13:29:32.047000+03:00
fridge 2020-08-23 14:10:05.355000+03:00
fridge 2020-08-23 1

In [33]:
for i in range(0,len(events)):
    a = events[i]['appl'].unique()
    a = a[a==a]
    print(a)
    for j in a:
        events[i].loc[events[i]['appl']==j] = events[i].loc[events[i]['appl']==j][events[i].loc[events[i]['appl']==j]['state']!= events[i].loc[events[i]['appl']==j]['state'].shift()]
        
        tmp = events[i].loc[events[i]['appl']==j].copy()
        
#         # if 1st event is 0 then drop
#         if tmp['state'].iloc[0]<1:
#             tmp = tmp.iloc[1:]
#         # if last event is 1 then drop
#         if tmp['state'].iloc[-1]>0:
#             tmp = tmp.iloc[:-1]
        
        # calculate time difference between on/off
        tmp = tmp.loc[tmp['state']>=0]
        tmp['ts'] =  tmp.index
        tmp['dif'] = tmp['ts'].values-tmp['ts'].shift().values
        tmp['dif'] = tmp['dif'].dt.seconds.fillna(0)
        
        # drop on/offs with duration less than 1 min
#         tmp = tmp[(tmp['dif']>60) | (tmp['dif'].shift(-1)>60) | (tmp['dif']==0)]
        
        # keep only first instance of each state
        tmp.loc[(tmp['state']==0.0) & (tmp['state'].shift()==1.0),'onoff'] = 1
        tmp.loc[(tmp['state']==1.0) & (tmp['state'].shift()==0.0) & (tmp['onoff'].shift()==1),'onoff'] = 1
        tmp['onoff'].iloc[0] = 1
#         tmp = tmp.loc[(tmp['state']==0.0) & (tmp['state'].shift()==1.0)]
        
        #calculate time difference again
        tmp['dif'] = tmp['ts'].values-tmp['ts'].shift().values
        tmp['dif'] = tmp['dif'].dt.seconds.fillna(0)
        tmp.drop('ts',axis=1, inplace=True)  
        
#         tmp = tmp.loc[tmp['onoff']==1]
        
        events[i] = events[i].loc[events[i]['appl']!=j]
        events[i] = pd.concat([events[i],tmp[['appl','state','dpwr','conflict','dif','onoff']]])
        events[i].sort_index(inplace=True)

[1. 2. 4. 3.]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




[]


In [35]:
prA = pd.DataFrame(events[0])
prA.to_csv('pwrA_70samples_5win.csv')


In [95]:
# Load csv files to compare differences
df1 = pd.read_csv('pwrA_70samples_5win.csv',index_col='Unnamed: 0')
df1['Datetime'] = pd.to_datetime(df1.index)
df1.set_index('Datetime',drop=True,inplace=True)

df2 = pd.read_csv('pwrA_70samples_10win.csv',index_col='Unnamed: 0')
df2['Datetime'] = pd.to_datetime(df2.index)
df2.set_index('Datetime',drop=True,inplace=True)

df3 = pd.read_csv('pwrA_140samples_10win.csv',index_col='Unnamed: 0')
df3['Datetime'] = pd.to_datetime(df3.index)
df3.set_index('Datetime',drop=True,inplace=True)


In [96]:
df1 = df1[['appl']]
df1.columns = ['appl1']
df1 = df1.dropna()

df2 = df2[['appl']]
df2.columns = ['appl2']
df2 = df2.dropna()

df3 = df3[['appl']]
df3.columns = ['appl3']
df3 = df3.dropna()

In [97]:
df = pd.concat([df1,df2,df3],axis=1)
df.head()

Unnamed: 0_level_0,appl1,appl2,appl3
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-08-23 00:00:55+03:00,1.0,1.0,
2020-08-23 00:38:44+03:00,1.0,1.0,1.0
2020-08-23 00:56:02+03:00,1.0,1.0,1.0
2020-08-23 01:32:24+03:00,1.0,1.0,1.0
2020-08-23 01:40:16+03:00,,,1.0


In [101]:
df.sort_index()

Unnamed: 0_level_0,appl1,appl2,appl3
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-08-23 00:00:55+03:00,1.0,1.0,
2020-08-23 00:38:44+03:00,1.0,1.0,1.0
2020-08-23 00:56:02+03:00,1.0,1.0,1.0
2020-08-23 01:32:24+03:00,1.0,1.0,1.0
2020-08-23 01:40:16+03:00,,,1.0
2020-08-23 01:55:55+03:00,,,1.0
2020-08-23 02:13:41+03:00,1.0,1.0,1.0
2020-08-23 04:32:19+03:00,1.0,1.0,1.0
2020-08-23 04:49:37+03:00,1.0,1.0,1.0
2020-08-23 05:31:41+03:00,1.0,1.0,


In [100]:
mappings[0]

{0: 'entilator',
 1: 'fridge',
 2: 'heatpumpA',
 3: 'oven',
 4: 'stove',
 5: 'vacuum',
 6: 'wash'}