In [None]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Read Data
Read csv data of 1-min eGauge meterage for 25 homes in NYC

In [None]:
nyc = pd.read_csv("data/1minute_data_newyork.csv")
metadata = pd.read_csv("data/metadata.csv")
metadata = metadata[1:]

Parse `nan` values and percentage values

In [None]:
def perc_txt_parse(pc):
    pc = str(pc)
    if len(pc.split("nan")) >= 2:
        return 0
    else:
        return eval(pc.split("%")[0]) * 0.01

## Preprocessing
Select records with >80% `eGauge_1min_data_availability`

In [None]:
metadata_sel['dataid'] = metadata_sel['dataid'].astype('int64');
metadata_sel = metadata[
metadata["egauge_1min_data_availability"].apply(
    lambda x: perc_txt_parse(x) > 0.8
)]

## Get Appliances List
From the columns of Meterage data and Metadata, the crossover represents the list of all possible appliances in each home

In [None]:
appliances = nyc_5997.columns[nyc_5997.columns.isin(metadata_sel.columns)]
appliances = list(appliances)
removable = [
    "dataid",
    "grid",
    "solar",
    "solar2"
]
for c in removable:
    appliances.remove(c)
pd.DataFrame(data={'key': appliances}).to_csv('appliances.csv')
appliances = list(pd.read_csv('appliances.csv')['key']) # list of all consumers

## Per-Residence Analysis
Class definition to process and analyse meterage and metadata for each household, represented by an instance of the class `Residence`

In [None]:
class Residence:
    def __init__(self, _id, all_meterage, all_metadata, _appliances=list(pd.read_csv('appliances.csv')['key'])):
        self.meterage = all_meterage[all_meterage['dataid'] == _id]
        self.metadata = all_metadata[all_metadata['dataid'] == _id]
        self.dataid = _id
        self.possible_appliances = _appliances
        exapp = []
        for app in _appliances:
            if self.metadata[app].iloc[0] == 'yes':
                exapp.append(app)
        self.existing_appliances = exapp
        self.meterage['localminute'] = pd.to_datetime(self.meterage['localminute'], format='%Y-%m-%d %H:%M:%S-05')
        self.meterage.sort_values('localminute', inplace=True)
        self.meterage.reset_index(inplace=True)

    def parse_datetime(t): # backup func
        return pd.to_datetime(":".join(t.split(":")[:2]), format='%Y-%m-%d %H:%M')
        

## Test Case
Test case `r5997` is demonstrated here.
```
r5997 = Residence(5997, nyc_5997, metadata_sel)
r5997.existing_appliances
```

In [None]:
r5997 = Residence(5997, nyc_5997, metadata_sel)
r5997.existing_appliances

In [None]:
r5997.meterage['solar'].iloc[:1440].plot()

## Test Case Visualisation
Visualisation of each of and the mean of a column of the test case `r5997`

In [None]:
def parse_load_profiles(p):
    import math
    n = int(math.floor(p.shape[0] / 1440))
    return p[:n*1440].reshape(n, 1440)
def pcp(stoch_profiles, stoch_profiles_avg):
    plt.figure(figsize=(10, 5))
    for n in stoch_profiles:
        plt.plot(np.arange(n.shape[0]), n, "#b0c4de")
        plt.xlabel("Time (hours)")
        plt.ylabel("Power (W)")
        
        #plt.ylim(ymin=0)
        # plt.ylim(ymax=5000)
        plt.margins(x=0)
        plt.margins(y=0)
    plt.plot(np.arange(n.shape[0]), stoch_profiles_avg, "#4169e1")
    plt.xticks(
        [0, 240, 480, (60 * 12), (60 * 16), (60 * 20), (60 * 24)],
        [0, 4, 8, 12, 16, 20, 24],
    )
    # plt.savefig('profiles.eps', format='eps', dpi=1000)
    plt.show()
