In [303]:
import pandas as pd
import numpy as np
import datetime
import os
import datetime
import csv
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.cluster import KMeans
from datetime import timedelta, date
%matplotlib inline

### Declare all file path variables

In [422]:
labels_file = "../../../../Dataset/ukdale/" + house + "/labels.dat"
path_to_house = "./house_2"
labels_file_path = path_to_house+"/labels.dat"
time_csv_Path = path_to_house+"/Time_CSV/Time_"
resampling_time_in_min = "30min"

### Method to get all channel file paths from the house directory

In [423]:
def get_channel_files(house_path):
    """
    Get channel files from the house directory. 
    
    Input:
    
    house_path = Path to house folder/directory
    
    Output:
    
    filepath_array = Array of file paths 
    
    """
    if(house_path[-1] != '/'):
        house_path = house_path + '/'
    filepath_array = []
    for x in os.listdir(house_path):
        if 'channel_' in x and x != "channel_1.dat":
            filepath_array.append(house_path + x)
    return filepath_array

### Method to read a channel file and add it to a dataframe

In [424]:
def read_channel_file(filepath):
    """
    This method reads channel file (.dat) using file path and returns a dataframe.
    
    Input:
    
    filepath = Path of the input channel (.dat) file
    
    Output:
    
    channel_df = Channel dataframe
    
    """
    channel_df = pd.read_csv(filepath, sep='\\s+', names=['Timestamp','Reading'], parse_dates=['Timestamp'], header=0)
    return channel_df

### Method to resample the channel usage in given time intervals

In [425]:
def resampling(input_df, time):
    """
    This method takes channel usage dataframe and time interval as input 
    and resamples the data by the input time. 
    
    Input:
    
    input_df = Channel usage dataframe
    time = time interval for resampling
    
    Output:
    
    final_data = Resampled dataframe
    
    """
    dataframe = input_df.set_index('Timestamp')
    dataframe.index = pd.to_datetime(dataframe.index,unit = "s")
    resample = dataframe.resample(time)
    resampled_data = resample.mean()
    final_data = resampled_data.reset_index()
    return final_data

### Method to resample and generate channel's on/off status data

In [426]:
def get_resampled_dict(filepath_list, label_dict):
    resampled_dict = {}
    for file in filepath_list:
        if('button' in file):
            continue
        df = read_channel_file(file)
        resampled_data = resampling(df, resampling_time_in_min)
        resampled_data = resampled_data.fillna(0)
        filename = file.split('/')[-1].split('.')[0]
        resampled_dict[label_dict[filename]] = resampled_data
    return resampled_dict

### Method to split datetime 

In [427]:
def add_time_columns(df):
    df['Timestamp'] = pd.to_datetime(df.Timestamp, format = '%Y-%m-%d %H:%M:%S')
    for i in ([df]):
        i['Date'] = i.Timestamp.dt.date
        i['Time'] = i.Timestamp.dt.time

### Method to get labels from the labels.dat file of UK-Dale dataset

In [428]:
def get_labels(filepath):
    """
    This method takes label file path as input and returns a dataframe with channel and appliance mappings
    
    Input:
    
    Label data file path.
    
    Output:
    
    Channel-Appliance name dataframe.
    
    """
    
    labels_df = pd.read_csv(labels_file_path, sep='\\s+', names=['Channel_id','Appliance'])
    labels_df["Channel_id"] = ["channel_"+str(i) for i in range(1,labels_df.shape[0]+1)]
    labels_dict = dict()
    for row in labels_df.iterrows():
        labels_dict[row[1]["Channel_id"]] = row[1]["Appliance"]
    return labels_dict
    # return labels_df, labels_dict

### Get the list of channel files for a house

In [429]:
label_dict = get_labels(path_to_house)

In [430]:
filepath_list = get_channel_files(path_to_house)

### Resample data, split datetime and create appliance wise dataframes of time and average values

In [431]:
resampled_df_dict = get_resampled_dict(filepath_list, label_dict)

In [432]:
for val in label_dict.values():
    if(val != 'aggregate'):
        add_time_columns(resampled_df_dict[val])

In [433]:
resampled_avg_df_dict = {}
appliance_time_avg_df_dict = {}

for val in label_dict.values():
    if(val != 'aggregate'):
        resampled_avg_df_dict[val]=resampled_df_dict[val].groupby('Time')['Reading'].mean()
        
for val in label_dict.values():
    if(val != 'aggregate'):
        times = list(resampled_avg_df_dict[val].index)
        readings= list(resampled_avg_df_dict[val].values)
        appliance_time_avg_df_dict[val] = pd.DataFrame({'Time':times,'Average':readings})

In [434]:
#  resampled_avg_df_dict['tv_dvd_digibox_lamp'].index

In [435]:
times = list(resampled_avg_df_dict['laptop'].index)

In [436]:
appliance_time_avg_df_dict['laptop']

Unnamed: 0,Time,Average
0,00:00:00,7.499268
1,00:30:00,6.277485
2,01:00:00,5.523409
3,01:30:00,4.16492
4,02:00:00,3.585104
5,02:30:00,2.6671
6,03:00:00,1.529357
7,03:30:00,0.607908
8,04:00:00,0.477899
9,04:30:00,0.493814


### Dictionary to save correct appliance name

In [437]:
show_name_dict = dict()
show_name_dict['laptop'] = "Laptop"
show_name_dict['monitor'] = "Monitor"
show_name_dict['speakers'] = "Speakers"
show_name_dict['server'] = "Server"
show_name_dict['router'] = "Router"
show_name_dict['server_hdd'] = "Server_hdd"
show_name_dict['kettle'] = "Kettle"
show_name_dict['rice_cooker'] = "Rice Cooker"
show_name_dict['running_machine'] = "Running Machine"
show_name_dict['laptop2'] = "Laptop2"
show_name_dict['washing_machine'] = "Washing Machine"
show_name_dict['dish_washer'] = "Dish Washer"
show_name_dict['fridge'] = "Fridge"
show_name_dict['microwave'] = "Microwave"
show_name_dict['toaster'] = "Toaster"
show_name_dict['playstation'] = "Playstation"
show_name_dict['modem'] = "Modem"
show_name_dict['cooker'] = "Cooker"
show_name_dict['electric_heater'] = "Electric Heater"
show_name_dict['projector'] = "Projector"
show_name_dict['tv_dvd_digibox_lamp'] = "TV & DVD Digibox & Lamp"
show_name_dict['kettle_radio'] = "Kettle & Radio"
show_name_dict['gas_boiler'] = "Gas Boiler"
show_name_dict['freezer'] = "Freezer"
show_name_dict['washing_machine_microwave_breadmaker'] = "Washing Machine & Microwave & Breadmaker"

### Logic to create dictionay of time to appliance values

In [438]:
time_dict = {}
for time in times:
    time_dict[time] = {}
    for appliance in label_dict.values():
        if(appliance != 'aggregate'):
            df = appliance_time_avg_df_dict[appliance]
            app = show_name_dict[appliance]
            time_dict[time][app] = df.loc[df["Time"]==time]['Average'].iloc[0]

In [442]:
time_dict[times[0]]

{'Laptop': 7.499268001615402,
 'Monitor': 10.438064889879872,
 'Speakers': 4.042233708376429,
 'Server': 15.092855733053968,
 'Router': 8.462483876929198,
 'Server_hdd': 2.349382231303889,
 'Kettle': 0.5855544931042072,
 'Rice Cooker': 0.8384950057589917,
 'Running Machine': 1.5774164357815255,
 'Laptop2': 0.019210995559630496,
 'Washing Machine': 2.7716389750937447,
 'Dish Washer': 1.2612178277435275,
 'Fridge': 38.92339668446003,
 'Microwave': 0.359711716854574,
 'Toaster': 0.23675755734850815,
 'Playstation': 0.7727599648040582,
 'Modem': 7.5228631857972905,
 'Cooker': 0.0010800658056154238}

### save CSVs

In [440]:
for time in times:
    t = int(time.strftime("%H%M%S"))
    time_dict_df = pd.DataFrame(time_dict[time].items(),columns=['Appliance','Average'])
    time_dict_df.to_csv(r''+time_csv_Path+str(t)+'.csv', header=True)
    