In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# create giant data object, to house all data
data = {}

In [None]:
# read all the labels for the data into a df
def read_labels_for_all_houses():
    houses = []
    for house in range(1, 7):
        hi = 'Data/low_freq/house_{}/labels.dat'.format(house)
        df = pd.read_csv(hi, sep=" ", header=None, names = ["appliance_id","appliance_name"], dtype={"appliance_id":"int64","appliance_name":"string"})
        df['house_id'] = house
        houses.append(df)
    return pd.concat(houses).reset_index(drop=True)

labels = read_labels_for_all_houses()
for house in range(1,7):
    print('House {}: \n'.format(house), labels[labels['house_id'] == house] , '\n')

data['labels_for_houses'] = labels

In [None]:
# read all the appliance data points for all houses
def read_channel_data(labels_for_houses, house_id):
    house_labels = labels_for_houses[labels_for_houses['house_id'] == house_id]
    # print(house_labels)
    df = pd.DataFrame()
    for index,row in house_labels.reset_index().iterrows():
        path = 'data/low_freq/house_{}/'.format(house_id)
        file = path + 'channel_{}.dat'.format(row['appliance_id'])
        cname = (row['house_id'], row['appliance_id'], row['appliance_name'])
        file_content = pd.read_table(file, sep = ' ', names = ['unix_time', cname], 
                                        dtype = {'unix_time': 'int64', cname:'float64'})
        if(index==0):
            df = file_content
        else:
            df = pd.merge(df, file_content, how='inner', on='unix_time')
    
    df['timestamp'] = df['unix_time'].astype("datetime64[s]")
    df = df.set_index(df['timestamp'].values)
    df.drop(['unix_time','timestamp'], axis=1, inplace=True)
    df.columns = pd.MultiIndex.from_tuples(df.columns, names=["house_id","appliance_id", "appliance_name"])
    return df

# print(data['labels_for_houses'])
data['channels'] = {}
dfs = []
for house in range(1,7):
    print('reading channels for house {}'.format(house))
    # data['channels'][house] = read_channel_data(data['labels_for_houses'], house)
    df = read_channel_data(data['labels_for_houses'], house)
    # print(df.head())
    dfs.append(df)

appliance_data = pd.concat(dfs, axis=1)
print(appliance_data.columns)
print(appliance_data.head())

In [None]:
# example of indexing with multiindex columns


# print columns
print(appliance_data.columns)

# print only names
print(appliance_data.columns.get_level_values('appliance_name'))

# print only appliance ids
print(appliance_data.columns.get_level_values('appliance_id'))


# print only ids
print(appliance_data.columns.get_level_values('house_id'))

# index using appliance name 
print(appliance_data.xs('mains',level='appliance_name', axis=1).head())

# and using appliance id
print(appliance_data.xs(2,level='appliance_id', axis=1).head())

# index column based on position
print(appliance_data.iloc[:,1].head())


In [None]:
# extract all mains
def hours(time):
    return time.hour + time.minute/60 + time.second / (60*60)

def index_to_hours(df):
    return [hours(t) for t in df.index.time]

In [None]:
mainss = appliance_data.xs("mains", level="appliance_name", axis=1)
timeToInt = index_to_hours(mainss)
mainss['date'] = mainss.index.date

# plot 
# for g,o in mainss.groupby('date'):
#     o = o.iloc[:,:-1]
#     # print(index_to_hours(o))
#     for i in range(len(o.columns)):
#         plt.plot(index_to_hours(o), o.iloc[:,i], label=o.columns[i])
#     plt.title(g)
#     plt.legend(loc="upper right")
#     plt.xlim([0,24])
#     plt.show()

plots = {}
fig = plt.figure()
fig.set_size_inches(20,4* (len(mainss.columns)-1))
# fig.set_size_inches(20)
for c in range(len(mainss.columns)-1):
    plots[c] = fig.add_subplot(len(mainss.columns)-1,1, c+1)


for g,o in mainss.groupby('date'):
    o = o.iloc[:,:-1]
    amount = len(o.columns)
    for i in range(amount):
        plots[i].fill_between(index_to_hours(o), o.iloc[:,i], alpha=(1/amount/5), color="blue")
        # plots[i].plot(index_to_hours(o), o.iloc[:,i], label=g, color = "blue", alpha = 0.)


for i in range(len(mainss.columns[:-1])):
    plots[i].set(xlim=[0,24], title=mainss.columns[i])
    # plots[i].legend(loc="upper right")
    # plots[i].show()`
plt.show()


In [None]:
# perform fft on signals
mainss = appliance_data.xs("refrigerator", level="appliance_name", axis=1)

print(mainss.columns)
mainss['date'] = mainss.index.date

for g,o in mainss.groupby('date'):
    o = o.iloc[:,:-1]
    y = o.iloc[:,0]
    yf = np.fft.fft(y)[:len(y)//2]
    xf = np.log(np.linspace(0, 1, num=len(yf)))
    fig = plt.figure()
    fig.set_size_inches(10,5)
    ax1 = fig.add_subplot(1,2,1) 
    ax2 = fig.add_subplot(1,2,2) 
    ax1.plot(xf,abs(yf))
    ax2.plot(index_to_hours(y),y)
    plt.show()
