In [1]:
from google.colab import drive
from psutil import virtual_memory

drive.mount('/content/gdrive/', force_remount=True)
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(virtual_memory().total / 1e9))

Mounted at /content/gdrive/
Your runtime has 54.8 gigabytes of available RAM



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from functools import reduce
import os

plt.style.use('seaborn')

In [3]:
os.chdir("gdrive/MyDrive/Dissertation/ukdale")

In [4]:
house_list = [1,2,5]
houses = []
for house in house_list:
    print(f'Loading House {house} data...')
    labels_file = pd.read_csv(f'house_{house}/labels.dat', sep=' ', names = ['id','Labels'])
    labels = []
    id = []
    for idx, row in labels_file.iterrows():
        label = row['Labels']
        if re.findall('.*aggregate*.', label) or re.findall('.*wash*.', label) or re.findall('.*dish*.', label) or re.findall('.*fridge*.', label):
            id.append(row['id']) 
            if row['Labels'] != 'aggregate':
                labels.append(row['Labels'][0].upper())
            else:
                labels.append(row['Labels'])

    df= []
    for idx, label in zip(id, labels):
        h = pd.read_csv(f'house_{house}/channel_{idx}.dat', sep = ' ', names = ['Date', label])
        h['Date'] = pd.to_datetime(h['Date'],unit = 's')
        h = h.resample('60s', on='Date').sum()
        if house == 1:
            h = h['2013-04':'2014-04']
        if house == 2:
            h = h['2013-02':'2013-10']
        df.append(h)

    dfs = reduce(lambda  x,y: pd.merge(x, y, left_index=True, right_index=True), df)
    dfs = dfs[['aggregate', 'F', 'W', 'D']]
    dfs.rename(columns={'W': 'washing_machine', 'D': 'dishwasher', 'F':'fridge'}, inplace=True)
    dfs = dfs[dfs['aggregate'] != 0]
    print(dfs.columns)
    houses.append(dfs)
    print(f'House {house} done...')
    print(f'House {house} of shape {dfs.shape}')
    print('---------------------')

Loading House 1 data...
Index(['aggregate', 'fridge', 'washing_machine', 'dishwasher'], dtype='object')
House 1 done...
House 1 of shape (568303, 4)
---------------------
Loading House 2 data...
Index(['aggregate', 'fridge', 'washing_machine', 'dishwasher'], dtype='object')
House 2 done...
House 2 of shape (172771, 4)
---------------------
Loading House 5 data...
Index(['aggregate', 'fridge', 'washing_machine', 'dishwasher'], dtype='object')
House 5 done...
House 5 of shape (190043, 4)
---------------------


In [5]:
house_events = []
for house, df in enumerate(houses):
    print('Getting events for appliances in house {}...'.format(house_list[house]))
    for col in df.columns:
        if col == 'washing_machine':
            df[col] = df[col].map(lambda val: 1 if val > 20 else 0) 
            print('{} - {}'.format(col, np.unique(df[col], return_counts=True)))
        elif col == 'dishwasher':
            df[col] = df[col].map(lambda val: 1 if val > 10 else 0) 
            print('{} - {}'.format(col, np.unique(df[col], return_counts=True)))
        elif col == 'fridge':
            df[col] = df[col].map(lambda val: 1 if val > 50 else 0) 
            print('{} - {}'.format(col, np.unique(df[col], return_counts=True)))
        else:
            continue
    house_events.append(df)
    print('House {} events done'.format(house_list[house]))
    print('---------------------')

Getting events for appliances in house 1...
fridge - (array([0, 1]), array([318707, 249596]))
washing_machine - (array([0, 1]), array([537712,  30591]))
dishwasher - (array([0, 1]), array([554649,  13654]))
House 1 events done
---------------------
Getting events for appliances in house 2...
fridge - (array([0, 1]), array([    14, 172757]))
washing_machine - (array([0, 1]), array([ 11270, 161501]))
dishwasher - (array([0, 1]), array([166366,   6405]))
House 2 events done
---------------------
Getting events for appliances in house 5...
fridge - (array([0, 1]), array([119543,  70500]))
washing_machine - (array([0, 1]), array([ 27054, 162989]))
dishwasher - (array([0, 1]), array([185388,   4655]))
House 5 events done
---------------------


In [6]:
house_events[0]

Unnamed: 0_level_0,aggregate,fridge,washing_machine,dishwasher
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-04-01 00:00:00,1501,0,0,0
2013-04-01 00:01:00,1674,0,0,0
2013-04-01 00:02:00,1682,0,0,0
2013-04-01 00:03:00,1677,0,0,0
2013-04-01 00:04:00,1675,0,0,0
...,...,...,...,...
2014-04-30 23:55:00,1399,0,0,0
2014-04-30 23:56:00,1556,0,0,0
2014-04-30 23:57:00,1550,0,0,0
2014-04-30 23:58:00,1302,0,0,0


In [7]:
house_labels = house_events.copy()

for id, house in enumerate(house_labels):
    for appliances in range(1,4):
        dfa = house[house.columns[1:appliances+1]]
        if appliances == 1:
            for col in dfa.columns:
                if col != 'aggregate':
                    house_labels[id][appliances] = dfa[col]
        else:
            labels = []
            for idx, row in dfa.iterrows():
                x = set(list(row[row == 1].keys()))
                if len(x) == 0:
                    labels.append('all_off')
                else:
                    if len(x) == 1:
                        labels.append(list(x)[0])
                    else:
                        labels.append(f'{list(x)[0]}, {list(x)[1]}')
            house_labels[id][appliances] = labels
    print(f'House {house_list[id]} labels extracted')

House 1 labels extracted
House 2 labels extracted
House 5 labels extracted


In [8]:
house_labels[0][2].unique()

array(['all_off', 'fridge', 'washing_machine, fridge', 'washing_machine'],
      dtype=object)

In [9]:
for idx, house in enumerate(house_labels):
    house.to_csv(f'processed_data/house{house_list[idx]}_labeled.csv')