In [1]:
import pandas as pd
import numpy as np
import datetime
import os
import datetime
import csv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.cluster import KMeans
from datetime import timedelta, date
%matplotlib inline

### Declare all file path variables

In [2]:
house = 'House_5'
path_to_house = "../../../Dataset/ukdale/" + house + "/"
labels_file_path = path_to_house + "labels.dat"

time_csv_path = "./Time_CSVs/" + house + "/"
resampling_time_in_min = "30min"

### Method to get all channel file paths from the house directory

In [3]:
def get_channel_files(house_path):
    """
    Get channel files from the house directory. 
    
    Input:
    
    house_path = Path to house folder/directory
    
    Output:
    
    filepath_array = Array of file paths 
    
    """
    if(house_path[-1] != '/'):
        house_path = house_path + '/'
    filepath_array = []
    for x in os.listdir(house_path):
        if 'channel_' in x and x != "channel_1.dat":
            filepath_array.append(house_path + x)
    return filepath_array

### Method to read a channel file and add it to a dataframe

In [4]:
def read_channel_file(filepath):
    """
    This method reads channel file (.dat) using file path and returns a dataframe.
    
    Input:
    
    filepath = Path of the input channel (.dat) file
    
    Output:
    
    channel_df = Channel dataframe
    
    """
    channel_df = pd.read_csv(filepath, sep='\\s+', names=['Timestamp','Reading'], parse_dates=['Timestamp'], header=0)
    return channel_df

### Method to resample the channel usage in given time intervals

In [5]:
def resampling(input_df, time):
    """
    This method takes channel usage dataframe and time interval as input 
    and resamples the data by the input time. 
    
    Input:
    
    input_df = Channel usage dataframe
    time = time interval for resampling
    
    Output:
    
    final_data = Resampled dataframe
    
    """
    dataframe = input_df.set_index('Timestamp')
    dataframe.index = pd.to_datetime(dataframe.index,unit = "s")
    resample = dataframe.resample(time)
    resampled_data = resample.mean()
    final_data = resampled_data.reset_index()
    return final_data

### Method to resample and generate channel's on/off status data

In [6]:
def get_resampled_dict(filepath_list, label_dict):
    resampled_dict = {}
    for file in filepath_list:
        if('button' in file):
            continue
        df = read_channel_file(file)
        resampled_data = resampling(df, resampling_time_in_min)
        resampled_data = resampled_data.fillna(0)
        filename = file.split('/')[-1].split('.')[0]
        resampled_dict[label_dict[filename]] = resampled_data
    return resampled_dict

### Method to split datetime 

In [7]:
def add_time_columns(df):
    df['Timestamp'] = pd.to_datetime(df.Timestamp, format = '%Y-%m-%d %H:%M:%S')
    for i in ([df]):
        i['Date'] = i.Timestamp.dt.date
        i['Time'] = i.Timestamp.dt.time

### Method to get labels from the labels.dat file of UK-Dale dataset

In [8]:
def get_labels(filepath):
    """
    This method takes label file path as input and returns a dataframe with channel and appliance mappings
    
    Input:
    
    Label data file path.
    
    Output:
    
    Channel-Appliance name dataframe.
    
    """
    
    labels_df = pd.read_csv(labels_file_path, sep='\\s+', names=['Channel_id','Appliance'])
    labels_df["Channel_id"] = ["channel_"+str(i) for i in range(1,labels_df.shape[0]+1)]
    labels_dict = dict()
    for row in labels_df.iterrows():
        labels_dict[row[1]["Channel_id"]] = row[1]["Appliance"]
    return labels_dict
    # return labels_df, labels_dict

In [38]:
def get_all_times_of_day(interval):
    """
    This method generates a list of times of a day seperated by specified interval.
    
    Input:
    interval = The gap between two neighboring time slots
    
    Output:
    Time = List of times seperated by specified interval
    """
    hour = ['00','01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23']
    minute = ['00', str(interval)[:-3]]
    second = '00'
    Time = []
    for hr in hour:
        for min in minute:
            temp = [hr, min, second]
            temp = ':'.join(temp)
            Time.append(temp)
    return Time

In [42]:
Time = get_all_times_of_day(resampling_time_in_min)

In [44]:
# Time

### Get the list of channel files for a house

In [9]:
label_dict = get_labels(path_to_house)

In [10]:
filepath_list = get_channel_files(path_to_house)

### Resample data, split datetime and create appliance wise dataframes of time and average values

In [11]:
resampled_df_dict = get_resampled_dict(filepath_list, label_dict)

In [47]:
resampled_df_dict['PS4'].head()

Unnamed: 0,Timestamp,Reading,Date,Time
0,2014-06-29 16:00:00,0.0,2014-06-29,16:00:00
1,2014-06-29 16:30:00,0.0,2014-06-29,16:30:00
2,2014-06-29 17:00:00,0.0,2014-06-29,17:00:00
3,2014-06-29 17:30:00,0.0,2014-06-29,17:30:00
4,2014-06-29 18:00:00,0.0,2014-06-29,18:00:00


In [12]:
for val in label_dict.values():
    if(val != 'aggregate'):
        add_time_columns(resampled_df_dict[val])

In [13]:
resampled_avg_df_dict = {}
appliance_time_avg_df_dict = {}

for val in label_dict.values():
    if(val != 'aggregate'):
        resampled_avg_df_dict[val]=resampled_df_dict[val].groupby('Time')['Reading'].mean()
        
for val in label_dict.values():
    if(val != 'aggregate'):
        times = list(resampled_avg_df_dict[val].index)
        readings= list(resampled_avg_df_dict[val].values)
        appliance_time_avg_df_dict[val] = pd.DataFrame({'Time':times,'Average':readings})

In [32]:
resampled_avg_df_dict['PS4']

Time
00:00:00    0.000000
00:30:00    0.000000
01:00:00    0.000000
01:30:00    0.000000
02:00:00    0.000000
02:30:00    0.000000
03:00:00    0.000000
03:30:00    0.000000
04:00:00    0.000000
04:30:00    0.000000
05:00:00    0.000000
05:30:00    0.000000
06:00:00    0.000000
06:30:00    0.000000
07:00:00    0.000000
07:30:00    0.000000
08:00:00    0.000000
08:30:00    0.007018
09:00:00    0.000000
09:30:00    0.000000
16:00:00    0.000000
16:30:00    0.000000
17:00:00    0.000000
17:30:00    0.000000
18:00:00    0.000000
18:30:00    0.000000
19:00:00    0.000000
19:30:00    0.000000
20:00:00    0.000000
20:30:00    0.000000
21:00:00    0.000000
21:30:00    0.000000
22:00:00    0.000000
22:30:00    0.000000
23:00:00    0.000000
23:30:00    0.000000
Name: Reading, dtype: float64

In [31]:
#  resampled_avg_df_dict['tv_dvd_digibox_lamp'].index

In [15]:
label_dict.values()

dict_values(['aggregate', 'stereo_speakers_bedroom', 'i7_desktop', 'hairdryer', 'primary_tv', '24_inch_lcd_bedroom', 'treadmill', 'network_attached_storage', 'core2_server', '24_inch_lcd', 'PS4', 'steam_iron', 'nespresso_pixie', 'atom_pc', 'toaster', 'home_theatre_amp', 'sky_hd_box', 'kettle', 'fridge_freezer', 'oven', 'electric_hob', 'dishwasher', 'microwave', 'washer_dryer', 'vacuum_cleaner'])

In [16]:
# times = list(resampled_avg_df_dict['laptop'].index)

In [17]:
# appliance_time_avg_df_dict['laptop']

### Dictionary to save correct appliance name

In [20]:
show_name_dict = dict()
show_name_dict['laptop'] = "Laptop"
show_name_dict['monitor'] = "Monitor"
show_name_dict['speakers'] = "Speakers"
show_name_dict['server'] = "Server"
show_name_dict['router'] = "Router"
show_name_dict['server_hdd'] = "Server_hdd"
show_name_dict['kettle'] = "Kettle"
show_name_dict['rice_cooker'] = "Rice Cooker"
show_name_dict['running_machine'] = "Running Machine"
show_name_dict['laptop2'] = "Laptop2"
show_name_dict['washing_machine'] = "Washing Machine"
show_name_dict['dish_washer'] = "Dish Washer"
show_name_dict['fridge'] = "Fridge"
show_name_dict['microwave'] = "Microwave"
show_name_dict['toaster'] = "Toaster"
show_name_dict['playstation'] = "Playstation"
show_name_dict['modem'] = "Modem"
show_name_dict['cooker'] = "Cooker"
show_name_dict['electric_heater'] = "Electric Heater"
show_name_dict['projector'] = "Projector"
show_name_dict['tv_dvd_digibox_lamp'] = "TV & DVD Digibox & Lamp"
show_name_dict['kettle_radio'] = "Kettle & Radio"
show_name_dict['gas_boiler'] = "Gas Boiler"
show_name_dict['freezer'] = "Freezer"
show_name_dict['washing_machine_microwave_breadmaker'] = "Washing Machine & Microwave & Breadmaker"
show_name_dict['stereo_speakers_bedroom'] = "Bedroom Stereo Speakers"
show_name_dict['i7_desktop'] = "Desktop"
show_name_dict['hairdryer'] = "Hairdryer"
show_name_dict['primary_tv'] = "Primary TV"
show_name_dict['24_inch_lcd_bedroom'] = "Bedroom TV"
show_name_dict['treadmill'] = "Treadmill"
show_name_dict['network_attached_storage'] = "Network Storage"
show_name_dict['core2_server'] = "Core 2 Server"
show_name_dict['24_inch_lcd'] = "Secondary TV"
show_name_dict['PS4'] = "PS4"
show_name_dict['steam_iron'] = "Steam Iron"
show_name_dict['nespresso_pixie'] = "Espresso Machine"
show_name_dict['atom_pc'] = "PC"
show_name_dict['toaster'] = "Toaster"
show_name_dict['home_theatre_amp'] = "Home Theatre Amp"
show_name_dict['sky_hd_box'] = "Sky HD Box"
show_name_dict['fridge_freezer'] = "Freezer"
show_name_dict['oven'] = "Oven"
show_name_dict['electric_hob'] = "Electric Hob"
show_name_dict['washer_dryer'] = "Washer & Dryer"
show_name_dict['vacuum_cleaner'] = "Vacuum Cleaner"
show_name_dict['dishwasher'] = "Dish Washer"

### Logic to create dictionay of time to appliance values

In [105]:
time_dict = {}
for time in times:
    time_dict[time] = {}
    for appliance in label_dict.values():
        if(appliance != 'aggregate'):
            df = appliance_time_avg_df_dict[appliance]
            app = show_name_dict[appliance]
            print(time)
            print(app)
            if(time not in list(df["Time"])):
                time_dict[time][app] = 0.0
            else:
                time_dict[time][app] = df.loc[df["Time"]==time]['Average'].iloc[0]

00:00:00
Bedroom Stereo Speakers
00:00:00
Desktop
00:00:00
Hairdryer
00:00:00
Primary TV
00:00:00
Bedroom TV
00:00:00
Treadmill
00:00:00
Network Storage
00:00:00
Core 2 Server
00:00:00
Secondary TV
00:00:00
PS4
00:00:00
Steam Iron
00:00:00
Espresso Machine
00:00:00
PC
00:00:00
Toaster
00:00:00
Home Theatre Amp
00:00:00
Sky HD Box
00:00:00
Kettle
00:00:00
Freezer
00:00:00
Oven
00:00:00
Electric Hob
00:00:00
Dish Washer
00:00:00
Microwave
00:00:00
Washer & Dryer
00:00:00
Vacuum Cleaner
00:30:00
Bedroom Stereo Speakers
00:30:00
Desktop
00:30:00
Hairdryer
00:30:00
Primary TV
00:30:00
Bedroom TV
00:30:00
Treadmill
00:30:00
Network Storage
00:30:00
Core 2 Server
00:30:00
Secondary TV
00:30:00
PS4
00:30:00
Steam Iron
00:30:00
Espresso Machine
00:30:00
PC
00:30:00
Toaster
00:30:00
Home Theatre Amp
00:30:00
Sky HD Box
00:30:00
Kettle
00:30:00
Freezer
00:30:00
Oven
00:30:00
Electric Hob
00:30:00
Dish Washer
00:30:00
Microwave
00:30:00
Washer & Dryer
00:30:00
Vacuum Cleaner
01:00:00
Bedroom Stere

09:30:00
Toaster
09:30:00
Home Theatre Amp
09:30:00
Sky HD Box
09:30:00
Kettle
09:30:00
Freezer
09:30:00
Oven
09:30:00
Electric Hob
09:30:00
Dish Washer
09:30:00
Microwave
09:30:00
Washer & Dryer
09:30:00
Vacuum Cleaner
10:00:00
Bedroom Stereo Speakers
10:00:00
Desktop
10:00:00
Hairdryer
10:00:00
Primary TV
10:00:00
Bedroom TV
10:00:00
Treadmill
10:00:00
Network Storage
10:00:00
Core 2 Server
10:00:00
Secondary TV
10:00:00
PS4
10:00:00
Steam Iron
10:00:00
Espresso Machine
10:00:00
PC
10:00:00
Toaster
10:00:00
Home Theatre Amp
10:00:00
Sky HD Box
10:00:00
Kettle
10:00:00
Freezer
10:00:00
Oven
10:00:00
Electric Hob
10:00:00
Dish Washer
10:00:00
Microwave
10:00:00
Washer & Dryer
10:00:00
Vacuum Cleaner
10:30:00
Bedroom Stereo Speakers
10:30:00
Desktop
10:30:00
Hairdryer
10:30:00
Primary TV
10:30:00
Bedroom TV
10:30:00
Treadmill
10:30:00
Network Storage
10:30:00
Core 2 Server
10:30:00
Secondary TV
10:30:00
PS4
10:30:00
Steam Iron
10:30:00
Espresso Machine
10:30:00
PC
10:30:00
Toaster
10:30

19:00:00
Network Storage
19:00:00
Core 2 Server
19:00:00
Secondary TV
19:00:00
PS4
19:00:00
Steam Iron
19:00:00
Espresso Machine
19:00:00
PC
19:00:00
Toaster
19:00:00
Home Theatre Amp
19:00:00
Sky HD Box
19:00:00
Kettle
19:00:00
Freezer
19:00:00
Oven
19:00:00
Electric Hob
19:00:00
Dish Washer
19:00:00
Microwave
19:00:00
Washer & Dryer
19:00:00
Vacuum Cleaner
19:30:00
Bedroom Stereo Speakers
19:30:00
Desktop
19:30:00
Hairdryer
19:30:00
Primary TV
19:30:00
Bedroom TV
19:30:00
Treadmill
19:30:00
Network Storage
19:30:00
Core 2 Server
19:30:00
Secondary TV
19:30:00
PS4
19:30:00
Steam Iron
19:30:00
Espresso Machine
19:30:00
PC
19:30:00
Toaster
19:30:00
Home Theatre Amp
19:30:00
Sky HD Box
19:30:00
Kettle
19:30:00
Freezer
19:30:00
Oven
19:30:00
Electric Hob
19:30:00
Dish Washer
19:30:00
Microwave
19:30:00
Washer & Dryer
19:30:00
Vacuum Cleaner
20:00:00
Bedroom Stereo Speakers
20:00:00
Desktop
20:00:00
Hairdryer
20:00:00
Primary TV
20:00:00
Bedroom TV
20:00:00
Treadmill
20:00:00
Network Stora

In [106]:
# if (str(times[1]) in list(df["Time"].astype(np.str))):
#     print(times[0])

In [107]:
# str(times[1])

In [108]:
# type(df["Time"].astype(np.str))

In [109]:
# df = appliance_time_avg_df_dict['PS4']

In [110]:
# df["Time"][0]

In [111]:
# show_name_dict['PS4']

In [113]:
# time_dict

In [114]:
# time_dict[times[0]]

### save CSVs

In [115]:
for time in times:
    t = str(time.strftime("%H:%M:%S"))
    time_dict_df = pd.DataFrame(time_dict[time].items(),columns=['Appliance','Average'])
    time_dict_df.to_csv(r''+time_csv_path + str(t) + '.csv', header=True)
    