In [119]:
import sqlite3
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
import numpy as np
from tqdm import tqdm
import datetime
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import math
import time
from sklearn.model_selection import train_test_split
import itertools
#import pyTruthTable as ptt

In [120]:
#USER INPUT
cnx = sqlite3.connect('data/db/homeassistant.db') #ADD LINK TO HASS DATABASE
startDate =  pd.to_datetime('2023-5-15 00:00:00') #ADD STARTDATE FROM WHERE TO BEGIN WEEK
interval = 20 #INTERVAL IN SECONDS OF TIMESLOTS AND SESSIONS

In [121]:
data = pd.read_sql_query("SELECT states.state_id, states_meta.entity_id, state, DATETIME(last_updated_ts, 'unixepoch', 'localtime') as last_updated, DATETIME(IIF(states.last_changed_ts IS NULL,states.last_updated_ts,states.last_changed_ts), 'unixepoch', 'localtime') as last_changed FROM states LEFT JOIN states_meta ON (states.metadata_id=states_meta.metadata_id) LEFT JOIN state_attributes ON (states.attributes_id=state_attributes.attributes_id) WHERE last_changed_ts IS NULL;", cnx)
cnx.close()
data['last_changed'] = pd.to_datetime(data['last_changed'])
data['last_changed'] = pd.to_datetime(data['last_updated'])
endDate = startDate + datetime.timedelta(days=14)
data = data.loc[(data['last_changed'] >= startDate)
                     & (data['last_changed'] <= endDate)]

In [122]:
data

Unnamed: 0,state_id,entity_id,state,last_updated,last_changed
147839,1884904,sensor.energy_price_checker_current_electricity_market_price,0.31634,2023-05-15 00:00:00,2023-05-15
147840,1884905,sensor.energy_price_checker_next_hour_electricity_market_price,0.30255,2023-05-15 00:00:00,2023-05-15
147841,1884906,sensor.energy_price_checker_lowest_energy_price_today,0.28066,2023-05-15 00:00:00,2023-05-15
147842,1884907,sensor.energy_price_checker_highest_energy_price_today,0.34745,2023-05-15 00:00:00,2023-05-15
147843,1884908,sensor.energy_price_checker_average_electricity_price_today,0.30331,2023-05-15 00:00:00,2023-05-15
...,...,...,...,...,...
660030,2438651,sensor.energy_price_checker_highest_energy_price_today,0.27815,2023-05-29 00:00:00,2023-05-29
660031,2438652,sensor.energy_price_checker_average_electricity_price_today,0.18828,2023-05-29 00:00:00,2023-05-29
660032,2438653,sensor.energy_price_checker_time_of_lowest_price_today,2023-05-29T12:00:00+00:00,2023-05-29 00:00:00,2023-05-29
660033,2438654,sensor.energy_price_checker_current_percentage_of_highest_electricity_price_today,88.5,2023-05-29 00:00:00,2023-05-29


In [123]:

#Define entity types to be considered in data
#entity types including 
includeNoise = True
entityTypes = ['binary_sensor', 'light','person', 'button', 'sun', 'lock', 'media_player', 'switch', 'alarm_control_panel', 'fan', 'sensor']
#entityTypes = ['binary_sensor', 'light', 'sun', 'fan', 'switch']
#Manual filter
entityFilter = []
if includeNoise == False:
    for entity in data['entity_id'].unique():
        if 'binary_sensor' in entity and 'power' in entity\
            or 'sensor' in entity and 'power' in entity\
            or 'warm_water' in entity\
            or 'overlay' in entity\
            or 'early_start' in entity\
            or 'open_window' in entity\
            or 'info' in entity\
            or 'failure' in entity\
            or 'tampering' in entity\
            or 'temperature' in entity\
            or 'time' in entity\
            or 'program' in entity\
            or 'connection' in entity\
            or 'link' in entity\
            or 'current' in entity\
            or 'configuration' in entity\
            or 'overheat' in entity\
            or 'focus' in entity\
            or 'battery' in entity\
            or 'microwave' in entity\
            or 'dishwasher' in entity\
            or 'tdarr' in entity\
            or 'remote' in entity:
            entityFilter.append(entity)
    entityFilter
    data = data[~data['entity_id'].isin(entityFilter)]
#Initialize empty list of entities
entities = []
#Loop over all unique entities remaining
for entity in data['entity_id'].unique():
    #Split device type from device name
    split = str(entity).split('.')
    #Grab device type and see if in list of considered types
    if split[0] in entityTypes:
        #When true, append device to list of entities to be used
        entities.append(entity)
#Create dataframe of all entities to be used
dataEntitiesFiltered = data[data['entity_id'].isin(entities)]

In [124]:
dataColumnsFiltered = dataEntitiesFiltered[['entity_id', 'state', 'last_updated', 'last_changed']]
dataUnavailableDeleted = dataColumnsFiltered[dataColumnsFiltered['state'] != 'unavailable']
dataNaDeleted = dataUnavailableDeleted.dropna(subset='state')
dataUnknownDeleted = dataNaDeleted[dataNaDeleted['state'] != 'unknown']
filteredSun = dataUnknownDeleted[dataUnknownDeleted['entity_id'].str.startswith("sun")].drop_duplicates(subset=['last_changed'])
filteredSun.dropna(subset=['last_changed'], inplace=True)
filteredRest = dataUnknownDeleted[~dataUnknownDeleted['entity_id'].str.startswith("sun")]
dataSunFix = pd.concat([filteredSun, filteredRest ]).reindex(dataUnknownDeleted.index).dropna(subset=['entity_id'])
dataCleaned = dataSunFix.reset_index(drop=True)

In [125]:
#Feature engineering
dataCleaned = dataCleaned[dataCleaned['last_changed'].notna()]
dataCleaned['timestamp'] = pd.to_datetime(dataCleaned['last_changed']).dt.floor('T')
dataCleaned['id_state'] = dataCleaned['entity_id'] + '-' + dataCleaned['state']

In [126]:
#Round all times to the second
dataCleaned['last_updated'] = pd.to_datetime(dataCleaned['last_updated']).dt.floor('S')
dataCleaned['last_changed'] = pd.to_datetime(dataCleaned['last_changed']).dt.floor('S')

In [127]:
#dataCleaned = dataCleaned[dataCleaned['id_state'].str.contains("-on")]

In [128]:
df, test = train_test_split(dataCleaned, test_size=0.50, shuffle=False)

In [129]:
automationEntityList = ['sun_above', 'sun_below', 'binary_sensor.walk_motion_detection-on', 'binary_sensor.living_motion_detection-on', 'binary_sensor.door_bath_motion_detection-on', 'binary_sensor.downstairs_hall_motion_detection-on',
                        'light.walk-on','light.kitchen-on','light.spots_living-on', 'light.window_living-on', 'light.sill-on','light.mirror-on','light.spots_bath-on', 'light.downstairs-on', 'light.toilet-on',
                        'switch.ps5-on',
                        'media_player.tv-on', 'media_player.receiver-on', 'switch.mediabox-on'
                        ]
#ttg.Truths(automationEntityList).as_pandas()

In [130]:
#Dynamically add start and end dates to bins based on first and last value
startDate = df['last_changed'].head(1).item().floor('T')
endDate = df['last_changed'].tail(1).item().floor('T')
binLength = interval
bins = pd.date_range(start=startDate, end=endDate, freq=f'{binLength}S')
print(f'Bins created with start date {startDate} and end date {endDate}')

Bins created with start date 2023-05-15 00:00:00 and end date 2023-05-21 23:53:00


In [131]:
statefulItemsets = pd.DataFrame(columns = df['entity_id'].unique())
statefulItemsets['timeslot'] = bins
statefulItemsets['dow'] = statefulItemsets.apply(lambda x: pd.Timestamp(x['timeslot']).day_name(), axis=1)

In [132]:
#Set initial to true so first rows will not check for previous value when NaN
try:
    statefulItemsets = pd.read_csv('data/cache/statefulItemsets.csv', index_col=0)
except:
    startTime = time.time()
    counter = 1
    for row in tqdm(range(len(statefulItemsets)), desc="Generating stateful dataframe"):
        if counter <= len(statefulItemsets):
            timeslot = row
            entities = df[(df.last_changed >= statefulItemsets['timeslot'][row]) & (df.last_changed < statefulItemsets['timeslot'][row] + datetime.timedelta(seconds=interval))]
            if len(entities) > 0:
                for col in statefulItemsets.iloc[: , :-2]:
                    if len(entities.loc[df['entity_id'] == col]) == 1:
                        statefulItemsets.loc[row, col] = entities.loc[df['entity_id'] == col, 'state'].iloc[0]
                    elif len(entities.loc[df['entity_id'] == col]) > 1:
                        statefulItemsets.loc[row, col] = entities.loc[df['entity_id'] == col, 'state'].iloc[-1]
                    elif counter > 1:
                        statefulItemsets.loc[row, col] = statefulItemsets.loc[row-1, col]


            elif counter > 1:
                for col in statefulItemsets.iloc[: , :-2]:
                    statefulItemsets.loc[row, col] = statefulItemsets.loc[row-1, col]
        counter += 1
    statefulItemsets.to_csv('data/cache/statefulItemsets.csv')
    endTime = time.time()
    runningTime = endTime - startTime
    print(f'Generated {len(statefulItemsets)} itemsets in {runningTime} seconds!')
else:
    print('Cached data found. Using old data.')
statefulItemsets.replace(['below_horizon', 'not_home', 'locked', 'idle', 'paused', 'standby', 'off'], 0, inplace=True)
statefulItemsets.replace(['above_horizon', 'home', 'unlocked','playing', 'on'], 1, inplace=True)

  statefulItemsets = pd.read_csv('data/cache/statefulItemsets.csv', index_col=0)


Cached data found. Using old data.


In [133]:
statefulItemsets

Unnamed: 0,switch.ventilation,fan.ventilation,sun.sun,binary_sensor.walk_motion_detection,switch.toilet,light.toilet,binary_sensor.door_bath_motion_detection,switch.mirror,light.mirror,switch.sill,light.sill,switch.spots_bath,light.spots_bath,binary_sensor.door_living_motion_detection,binary_sensor.downstairs_hall_motion_detection,switch.downstairs,light.downstairs,binary_sensor.door_door_sensor,switch.pc,switch.walk,light.walk,light.window_bed,light.closet,light.ambient,light.bed_left,light.hue_play_r,light.bed_right,light.hue_play_l,switch.accent_bed,light.accent_bed,switch.motion_override,switch.ps5,light.window_living,light.bar,light.showcase,light.cupboard,light.kitchen,light.vinyl_player,switch.vinyl,binary_sensor.door,switch.upstairs,light.upstairs,light.spots_bed,light.spots_living,switch.accent_living,light.accent_living,light.window_light_2,light.window_light_3,light.window_light_1,timeslot,dow
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-05-15 05:20:00,Monday
1,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-05-15 05:20:20,Monday
2,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-05-15 05:20:40,Monday
3,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-05-15 05:21:00,Monday
4,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-05-15 05:21:20,Monday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27812,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2023-05-21 15:50:40,Sunday
27813,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2023-05-21 15:51:00,Sunday
27814,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2023-05-21 15:51:20,Sunday
27815,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2023-05-21 15:51:40,Sunday


In [134]:
loop = 0
for entity in statefulItemsets.loc[:, ~statefulItemsets.columns.isin(['timeslot', 'dow'])]:
    firstDataCell = 0
    for cell in statefulItemsets[entity]:
        try:
            if math.isnan(cell):
                firstDataCell +=1 
            else:
                break
        except: continue
    if firstDataCell > 0 and firstDataCell != len(statefulItemsets):
        actual = statefulItemsets[entity].iloc[firstDataCell]
        if actual == 1 or actual == 1.0:
            statefulItemsets[entity].iloc[0:firstDataCell] = 0
        else:
            statefulItemsets[entity].iloc[0:firstDataCell] = 1



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  statefulItemsets[entity].iloc[0:firstDataCell] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  statefulItemsets[entity].iloc[0:firstDataCell] = 0


In [135]:
corr = statefulItemsets.corr()
dupeList = []
for row in corr:
    for col in corr:
        dupeEntity = []
        if corr.loc[row,col] == 1 and row != col:
            dupeEntity.append(row)
            dupeEntity.append(col)
            dupeEntitySorted = sorted(dupeEntity)
            for dupeEntity in dupeEntitySorted:
                if 'switch.' in dupeEntity and dupeEntity not in dupeList:
                    dupeList.append(dupeEntity)
df = df[~df['entity_id'].isin(dupeList)].reset_index(drop=True)
test = test[~test['entity_id'].isin(dupeList)].reset_index(drop=True)
statefulItemsets.drop(dupeList, axis=1, inplace=True)
duplicatesFormat = ', '.join(dupeList)
print(f'Deleted duplicate entities: {duplicatesFormat}')


Deleted duplicate entities: switch.ventilation, switch.toilet, switch.mirror, switch.sill, switch.spots_bath, switch.downstairs, switch.walk, switch.accent_bed, switch.vinyl, switch.upstairs, switch.accent_living


In [136]:
binLists = []
iteration = 0
for bin in tqdm(bins, desc="Generating bins"):
    startStop = []
    startStop.append(bins[iteration])
    startStop.append(bins[iteration + 1])
    binLists.append(startStop)
    iteration += 1
    if iteration == len(bins) - 1:
        break

Generating bins: 100%|█████████▉| 30218/30220 [00:00<00:00, 67787.12it/s]


In [137]:
df = df[df['id_state'].str.contains("-on")]
test = test[test['id_state'].str.contains("-on")]

In [138]:
#Fill all bins with state changes in that timeslot
startTime = time.time()
timeslotsItemsets = []
sunUp = False
for binList in binLists:
     current = df[(df.timestamp >= binList[0]) & (df.timestamp <= binList[0])]['id_state']
     if len(current) > 0:
          lastIteration = current
          timeslotsItemsets.append(list(current))
          if 'sun.sun-below_horizon' in list(current):
               sunUp = False
          elif 'sun.sun-above_horizon' in list(current):
               sunUp = True
          if sunUp == False:
               timeslotsItemsets[-1].append('sun_below')
          else:
               timeslotsItemsets[-1].append('sun_above')

for entityList in timeslotsItemsets:
     if 'sun.sun-above_horizon' in entityList:
          timeslotsItemsets.remove(entityList)
     if 'sun.sun-below_horizon' in entityList:
          timeslotsItemsets.remove(entityList)
endTime = time.time()
runningTime = endTime - startTime
print(f'Generated {len(timeslotsItemsets)} itemsets in {runningTime} seconds!')

Generated 1357 itemsets in 14.871795892715454 seconds!


In [139]:
def sessionGenerator(data):
    startTime = time.time()
    debug = False
    #Initialize session itemsets list
    sessionItemsets = []
    #Initialize active section tracker variable
    session = None
    #Set initial sun value
    sunUp = False
    #Loop over all state changes in the dataframe
    for stateChange in range(len(data)):
        #Check if current state change is a sensor so a new session can be started if yes
        if 'binary_sensor.' in data.iloc[stateChange]['id_state'] or 'switch.' in data.iloc[stateChange]['id_state']:
            if session != None and len(session) > 1:
                if sunUp == True:
                    session.append('sun_above')
                elif sunUp == False:
                    session.append('sun_below')
                sessionItemsets.append(session)
            #Record start time of session
            sessionStartTime = data.iloc[stateChange]['last_changed']
            #Set session to current iteration
            session = [data.iloc[stateChange]['id_state']]
        #If not sensor, but a session is running, add to session if within interval
        elif session != None and data.iloc[stateChange]['last_changed'] - sessionStartTime < pd.Timedelta(interval, "s"):
            session.append(data.iloc[stateChange]['id_state'])
        #If sun variable, change state of sun
        elif 'sun.sun-below_horizon' in data.iloc[stateChange]['id_state']:
            sunUp = False
        elif 'sun.sun-above_horizon' in data.iloc[stateChange]['id_state']:
            sunUp = True
        #If not sensor, session is running but outside of interval add the sun state and stop the session
        elif session != None and data.iloc[stateChange]['last_changed'] - sessionStartTime > pd.Timedelta(interval, "s"):
            if len(session) > 1:
                if sunUp == True:
                    session.append('sun_above')
                elif sunUp == False:
                    session.append('sun_below')
                sessionItemsets.append(session)
                if debug == True:
                    print(f'SESSION FOUND FROM {sessionStartTime} TO {sessionStartTime + pd.Timedelta(interval, "s")}')
                    print(f'FOUND TOTAL OF {len(session) -1} ACTUATORS')
                    print(session)
                    time.sleep(4)
                session = None
        endTime = time.time()
    runningTime = endTime - startTime
    print(f'Generated {len(sessionItemsets)} itemsets in {runningTime} seconds!')
    return sessionItemsets
sessionItemsets = sessionGenerator(df)

Generated 184 itemsets in 1.2047150135040283 seconds!


In [140]:
def transaction_encoder(data):
    te = TransactionEncoder()
    te_ary = te.fit(data).transform(data)
    return pd.DataFrame(te_ary, columns=te.columns_)

In [141]:
timeslotsEncoded = transaction_encoder(timeslotsItemsets)

In [142]:
sessionsEncoded = transaction_encoder(sessionItemsets)

In [143]:
statefulEncoded = statefulItemsets.loc[:, ~statefulItemsets.columns.isin(['timeslot', 'dow'])].astype('bool')

In [144]:
def generateAutomations(rules, metric, threshold):
    automations = {
    }
    for antecedent in rules[rules[metric] > threshold]['antecedents'].unique():
        consequentsList = []
        for consequents in rules[rules['antecedents'] == antecedent]['consequents']:
            for consequent in consequents.split(', '):
                if consequent not in consequentsList:
                    consequentsList.append(consequent)
        antecedent = str(sorted(antecedent.split(', ')))
        automations[antecedent] = sorted(consequentsList)
    return automations

In [145]:
validationDict = {
 "['binary_sensor.walk_motion_detection-on', 'sun_below']": ['light.walk-on'],
"['binary_sensor.downstairs_hall_motion_detection-on']": ['light.downstairs-on'],
"['binary_sensor.door_bath_motion_detection-on']": ['light.sill-on','light.mirror-on','light.spots_bath-on'], 
"['binary_sensor.walk_motion_detection-on']": ['light.toilet-on'],
"['binary_sensor.door_living_motion_detection-on']": ['light.kitchen-on', 'light.cupboard-on']

}
maxLength = 9000000
def hyperDrive(data, freqLow, freqHigh, freqStep, antecedents_include, antecedents_exclude, consequents_exclude, ruleMetric = 'confidence', pruneMetric = 'lift', antecedent_len=0, verboseF = False):
    startTime = time.time()
    truthTable = pd.DataFrame(columns=df['entity_id'].unique())
    results = []
    freqLength = (freqHigh - freqLow)/freqStep
    step = 0
    initial = True
    for minimalFrequency in np.arange(freqLow, freqHigh , freqStep):
        if initial == True:
            initial = False
        else:
            step += 1
        frequentMine = fpgrowth(data, min_support=minimalFrequency, use_colnames=True)
        if len(frequentMine) > 0 and len(frequentMine) < maxLength:
            metrLow = 0
            metrHigh = 1
            metrStep = .1
            metrLength = (metrHigh - metrLow)/metrStep
            stepMet = 0
            for metricThreshold in np.arange(metrLow,metrHigh,metrStep):
                #print(f'-------METRIC THRESHOLD: {metricThreshold}')
                print(f'Progress: {step}/{freqLength}, Metric: {stepMet}/{metrLength}', end='\r')
                stepMet += 1
                rules = association_rules(frequentMine,  metric=ruleMetric, min_threshold=metricThreshold)
                rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
                rules["consequents_len"] = rules["consequents"].apply(lambda x: len(x))
                rules["consequents"] = rules["consequents"].apply(lambda x: ', '.join(list(x))).astype("unicode")
                rules["antecedents"] = rules["antecedents"].apply(lambda x: ', '.join(list(x))).astype("unicode")
                if antecedent_len > 0:
                    filteredRules = rules[(rules['antecedents'].str.contains('|'.join(antecedents_include))) & ~(rules['antecedents'].str.contains('|'.join(antecedents_exclude))) & ~(rules['consequents'].str.contains('|'.join(consequents_exclude))) & (rules['antecedent_len'] == antecedent_len)]
                else:
                    filteredRules = rules[(rules['antecedents'].str.contains('|'.join(antecedents_include))) & ~(rules['antecedents'].str.contains('|'.join(antecedents_exclude))) & ~(rules['consequents'].str.contains('|'.join(consequents_exclude)))]
                if pruneMetric == 'confidence':
                    pruneThreshLow = 0
                    pruneThreshHigh = 1
                    pruneThreshStep = 0.2
                elif pruneMetric == 'lift' or pruneMetric == 'conviction':
                    pruneThreshLow = 0
                    pruneThreshHigh = 25
                    pruneThreshStep = 0.5
                for pruneThreshold in np.arange(pruneThreshLow,pruneThreshHigh,pruneThreshStep):
                    automations = generateAutomations(filteredRules, pruneMetric, pruneThreshold)
                    if verboseF == True and len(automations) > 0:
                        print(automations)
                    
                    for value in validationDict.values():
                        value.sort()
                    tp = 0
                    fp = 0
                    for automation in automations.items():
                        for entityState in automation:
                            truthTable[entityState] = 1
                        if automation in validationDict.items():
                            tp += 1
                        else:
                            fp += 1
                    if tp > 0:
                        fn = len(validationDict) - tp
                        recall = (tp)/(tp+fn)
                        precision = (tp)/(tp+fp)
                        f1 = 2 * (precision * recall) / (precision + recall)
                        result = [minimalFrequency, len(frequentMine), metricThreshold, len(rules), pruneThreshold, precision, recall, f1, tp, fp, fn, len(automations), {str(automations.keys())}, {str(automations.values())}]
                        results.append(result)

    overview = pd.DataFrame.from_records(results, columns=['minimal_frequency', 'itemsets_mined', f'{ruleMetric}_threshold', 'rules_mined', f'{pruneMetric}_prune_threshold', 'precision', 'recall', 'f1' , 'tp', 'fp', 'fn', 'rule_length', 'antecedents', 'consequents'])
    endTime = time.time()
    runningTime = round(endTime - startTime, 2)
    if len(overview) > 0:
        bestAutomation = overview.sort_values(['recall', 'precision', 'confidence_threshold', 'lift_prune_threshold'],ascending = [False, False, False, False]).iloc[0]
        bestItemsetAmount = bestAutomation['itemsets_mined']
        bestRuleAmount = bestAutomation['rules_mined']
        bestPruneAmount = bestAutomation['rule_length']
        bestPrecision = bestAutomation['precision']
        bestRecall = bestAutomation['recall']
        bestf1 = bestAutomation['f1']
        bestMinfreq = bestAutomation['minimal_frequency']
        bestMetricThresh = bestAutomation[f'{ruleMetric}_threshold']
        bestPruneThresh = bestAutomation[f'{pruneMetric}_prune_threshold']
        print('--------------------PERFORMANCE-----------------------')
        print(f'Completed in {runningTime} seconds')
        print('--------------------COUNTS-----------------------')
        print(f'Total itemsets: {len(data)}')
        print(f'Frequent itemsets: {bestItemsetAmount}')
        print(f'Rules: {bestRuleAmount}')
        print(f'Rules after pruning: {bestPruneAmount}')
        print('--------------------METRICS-----------------------')
        print(f'Min sup frequency: {bestMinfreq}')
        print(f'{ruleMetric} threshold:{bestMetricThresh}')
        print(f'{pruneMetric} prune threshold:{bestPruneThresh}')
        print(f'TP: {bestAutomation["tp"]}, FP: {bestAutomation["fp"]}, FN: {bestAutomation["fn"]}')
        print(f'Precision: {bestPrecision}')
        print(f'Recall:{bestRecall}')
        print(f'F1:{bestf1}')
        antlist = str(bestAutomation['antecedents']).split(']", ')
        conlist = str(bestAutomation['consequents']).split('], ')
        display(pd.DataFrame({'Antecedents': antlist, 'Consequents': conlist}))
    else: print('No rules found.')
    return overview

In [146]:
timeslotAutomations = hyperDrive(timeslotsEncoded, ruleMetric = 'confidence', pruneMetric = 'lift', freqLow=0.001 ,freqHigh = 0.1, freqStep = 0.01, antecedents_include = ['binary_sensor'], antecedents_exclude = ['light'], consequents_exclude = ['sun', 'binary_sensor'])

Progress: 0/9.9, Metric: 0/10.0

--------------------PERFORMANCE-----------------------
Completed in 11.28 seconds
--------------------COUNTS-----------------------
Total itemsets: 1357
Frequent itemsets: 109
Rules: 628
Rules after pruning: 8
--------------------METRICS-----------------------
Min sup frequency: 0.011
confidence threshold:0.1
lift prune threshold:1.5
TP: 3, FP: 5, FN: 2
Precision: 0.375
Recall:0.6
F1:0.4615384615384615


Unnamed: 0,Antecedents,Consequents
0,"{'dict_keys([""[\'binary_sensor.walk_motion_detection-on\'","{""dict_values([['light.toilet-on'"
1,"""[\'binary_sensor.walk_motion_detection-on\', \'sun_below\'",['light.toilet-on'
2,"""[\'binary_sensor.door_bath_motion_detection-on\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
3,"""[\'binary_sensor.door_bath_motion_detection-on\', \'sun_below\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
4,"""[\'binary_sensor.door_bath_motion_detection-on\', \'binary_sensor.walk_motion_detection-on\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
5,"""[\'binary_sensor.door_bath_motion_detection-on\', \'binary_sensor.walk_motion_detection-on\', \'sun_below\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
6,"""[\'binary_sensor.downstairs_hall_motion_detection-on\'",['light.downstairs-on'
7,"""[\'binary_sensor.downstairs_hall_motion_detection-on\', \'sun_below\']""])'}","['light.downstairs-on']])""}"


In [147]:
sessionsAutomations = hyperDrive(sessionsEncoded, ruleMetric = 'confidence', pruneMetric = 'lift', freqLow=0.001 ,freqHigh = 0.1, freqStep = 0.001, antecedents_include = ['binary_sensor'], antecedents_exclude = ['light'], consequents_exclude = ['sun', 'binary_sensor'])
#sessionsAutomations.sort_values(['discovery_rate', 'precision'],ascending = [False, False]).head(3)

--------------------PERFORMANCE-----------------------
Completed in 83.64 seconds
--------------------COUNTS-----------------------
Total itemsets: 184
Frequent itemsets: 337
Rules: 5979
Rules after pruning: 6
--------------------METRICS-----------------------
Min sup frequency: 0.011
confidence threshold:0.7000000000000001
lift prune threshold:1.5
TP: 3, FP: 3, FN: 2
Precision: 0.5
Recall:0.6
F1:0.5454545454545454


Unnamed: 0,Antecedents,Consequents
0,"{'dict_keys([""[\'binary_sensor.walk_motion_detection-on\'","{""dict_values([['light.toilet-on'"
1,"""[\'binary_sensor.walk_motion_detection-on\', \'sun_below\'",['light.toilet-on'
2,"""[\'binary_sensor.door_bath_motion_detection-on\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
3,"""[\'binary_sensor.door_bath_motion_detection-on\', \'sun_below\'","['light.mirror-on', 'light.sill-on', 'light.spots_bath-on'"
4,"""[\'binary_sensor.downstairs_hall_motion_detection-on\'",['light.downstairs-on'
5,"""[\'binary_sensor.downstairs_hall_motion_detection-on\', \'sun_below\']""])'}","['light.downstairs-on']])""}"


In [148]:
statefulEncoded

Unnamed: 0,fan.ventilation,sun.sun,binary_sensor.walk_motion_detection,light.toilet,binary_sensor.door_bath_motion_detection,light.mirror,light.sill,light.spots_bath,binary_sensor.door_living_motion_detection,binary_sensor.downstairs_hall_motion_detection,light.downstairs,binary_sensor.door_door_sensor,switch.pc,light.walk,light.window_bed,light.closet,light.ambient,light.bed_left,light.hue_play_r,light.bed_right,light.hue_play_l,light.accent_bed,switch.motion_override,switch.ps5,light.window_living,light.bar,light.showcase,light.cupboard,light.kitchen,light.vinyl_player,binary_sensor.door,light.upstairs,light.spots_bed,light.spots_living,light.accent_living,light.window_light_2,light.window_light_3,light.window_light_1
0,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27812,True,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
27813,True,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
27814,True,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
27815,True,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True


In [149]:
sessionsEncoded

Unnamed: 0,binary_sensor.door_bath_motion_detection-on,binary_sensor.door_door_sensor-on,binary_sensor.door_living_motion_detection-on,binary_sensor.downstairs_hall_motion_detection-on,binary_sensor.microwave_combination_oven_door-on,binary_sensor.microwave_combination_oven_remote_control-on,binary_sensor.walk_motion_detection-on,binary_sensor.walk_tampering_product_cover_removed-on,fan.ventilation-on,light.ambient-on,light.bed_left-on,light.bed_right-on,light.closet-on,light.downstairs-on,light.hue_play_l-on,light.hue_play_r-on,light.kitchen-on,light.microwave_combination_oven_light-on,light.mirror-on,light.sill-on,light.spots_bath-on,light.spots_living-on,light.toilet-on,light.walk-on,light.window_bed-on,light.window_living-on,media_player.receiver-on,media_player.tv-on,sensor.microwave_combination_oven_status-on,sun_below,switch.motion_override-on
0,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False
1,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,True,False
2,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,True,False
3,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,True,False
4,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,False,False,False,False,False,False,True,False,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False
180,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,True,False
181,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,False,True,False
182,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False


In [150]:
statefulAutomations = hyperDrive(statefulEncoded, ruleMetric = 'confidence', pruneMetric = 'lift', freqLow=0.005 ,freqHigh = 0.006, freqStep = 0.001, antecedents_include = ['binary_sensor'], antecedents_exclude = ['light'], consequents_exclude = ['sun', 'binary_sensor'])

Progress: 0/1.0, Metric: 0/10.0

  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTable[entityState] = 1
  truthTab

Progress: 0/1.0, Metric: 4/10.0

KeyboardInterrupt: 

In [None]:
sessionItemsets_test = sessionGenerator(test)

Generated 137 itemsets in 0.8330180644989014 seconds!


In [None]:
#VALIDATION
sessionItemsets_testEncoded = transaction_encoder(sessionItemsets_test)
frequentSessionsTest = fpgrowth(sessionItemsets_testEncoded , min_support=0.005, use_colnames=True)
sessionsRuleTest = association_rules(frequentSessionsTest, metric="confidence", min_threshold=0.7)
sessionsRuleTest["consequents"] = sessionsRuleTest["consequents"].apply(lambda x: ', '.join(list(x))).astype("unicode")
sessionsRuleTest["antecedents"] = sessionsRuleTest["antecedents"].apply(lambda x: ', '.join(list(x))).astype("unicode")

antecedents_include = ['binary_sensor']
antecedents_exclude = ['light']
consequents_exclude = ['sun', 'binary_sensor']
final = sessionsRuleTest[(sessionsRuleTest['antecedents'].str.contains('|'.join(antecedents_include))) & ~(sessionsRuleTest['antecedents'].str.contains('|'.join(antecedents_exclude))) & ~(sessionsRuleTest['consequents'].str.contains('|'.join(consequents_exclude)))]

automations = generateAutomations(final, 'lift', 1.5)
counter = 1
for automation in automations:
    print(f'-------AUTOMATION {counter}-------')
    counter += 1
    print(automation)
    print(automations[automation])

-------AUTOMATION 1-------
['binary_sensor.door_bath_motion_detection-on']
['light.mirror-on', 'light.sill-on', 'light.spots_bath-on']
-------AUTOMATION 2-------
['binary_sensor.door_bath_motion_detection-on', 'sun_below']
['light.mirror-on', 'light.sill-on', 'light.spots_bath-on']
-------AUTOMATION 3-------
['binary_sensor.walk_motion_detection-on']
['light.toilet-on']
-------AUTOMATION 4-------
['binary_sensor.walk_motion_detection-on', 'sun_below']
['light.toilet-on']
-------AUTOMATION 5-------
['binary_sensor.door_living_motion_detection-on']
['light.kitchen-on']
-------AUTOMATION 6-------
['binary_sensor.door_living_motion_detection-on', 'sun_below']
['light.kitchen-on']
-------AUTOMATION 7-------
['binary_sensor.downstairs_hall_motion_detection-on']
['light.toilet-on']
-------AUTOMATION 8-------
['binary_sensor.downstairs_hall_motion_detection-on', 'sun_below']
['light.toilet-on']
