# Home Energy Audit

### Technology
- [pydrive](https://towardsdatascience.com/how-to-manage-files-in-google-drive-with-python-d26471d91ecd)
  - I want to protect the privacy of my electricity and thermostat data besides what I share here so I will access from google drive.
  - [How to set up from pydrive doc](https://pythonhosted.org/PyDrive/quickstart.html) and [Setting up the API from Medium post](https://medium.com/@chingjunetao/simple-way-to-access-to-google-service-api-a22f4251bb52), you also need to make sure to enable yourself as a test user, no reason to publish the app.

### Data sources
- [NOAA weather data](https://www.ncei.noaa.gov/pub/data/uscrn/products/hourly02/) (for the Sandstone, MN weather station)
- [Sense electricity data](https://help.sense.com/hc/en-us/articles/360011635634-Web-App-v4)
- [Nest air temp/humidity/runtime data](https://takeout.google.com/settings/takeout)
  - [More info on confusing nest thermostat headers](https://www.reddit.com/r/Nest/comments/9ui8m4/nest_archive_data_confused/)

In [None]:
# OS
import sys, os, pathlib

# Data manipulation
import numpy as np
import pandas as pd

pd.options.mode.chained_assignment = None

# String manipulation
import re

# Plotting
import plotnine as p9
from plotnine import ggplot, aes, geoms, facets, scales, labels, themes

# Local directories
home_dir = pathlib.Path(os.path.realpath("__file__")).parents[0]


In [None]:
from helpers.gauth import make_drive_creds
from helpers.gdrive import make_drive
from helpers.data_prep import get_noaa_data, get_sense_data, get_nest_data


In [None]:
make_drive_creds(home_dir, "home_energy_audit")


In [None]:
drive = make_drive()


### Sense energy monitor data

In [None]:
sense_data = get_sense_data(drive)


In [None]:
display(sense_data.shape, sense_data.dtypes)


### Nest thermostat data

In [None]:
nest_data = get_nest_data(drive)


In [None]:
display(nest_data.shape, nest_data.dtypes)


### NOAA weather data

In [None]:
noaa_data, noaa_readme = get_noaa_data(drive)


In [None]:
display(noaa_data.shape, noaa_data.dtypes)


In [None]:
print(
    "\n".join(noaa_readme.split("\n")[:31]),
    "\nFile truncated by Henrik for readability...",
)


### Join data for analysis

In [None]:
subset_sense_data = sense_data[["datetime", "name", "avg_wattage", "kwh"]]
subset_sense_data["name"] = subset_sense_data.name.str.lower().str.replace(" ", "_")
subset_sense_data.loc[subset_sense_data.name.isna(), "name"] = "unknown"


In [None]:
pivoted_sense_data = pd.pivot(
    subset_sense_data, index="datetime", columns="name", values="kwh"
)



In [None]:
nest_data["datetime"] = pd.to_datetime(
    nest_data.date + " " + nest_data.time, infer_datetime_format=True
)



In [None]:
nest_data_hourly = (
    nest_data.set_index("datetime")
    .resample("H")["avg_temp", "avg_humidity"]
    .mean()
    .reset_index()
)



In [None]:
def convert_to_date_time(date, time):
    """Convert NOAA provided date and time integers to a datetime string.

    Args:
        date (str): A date of the form yearmonthday e.g. 20220202.
        time (str): A time of the form hourminute e.g. 2300.

    Returns:
        _type_: _description_
    """
    date = f"{date[:4]}-{date[4:6]}-{date[6:]}"
    time = f"{time[:2]}:{time[2:]}:00"
    return f"{date} {time}"


In [None]:
noaa_data.insert(
    0,
    "datetime",
    pd.to_datetime(
        noaa_data.apply(
            lambda x: convert_to_date_time(x["lst_date"], x["lst_time"]), axis=1
        ),
        infer_datetime_format=True,
    ),
)



In [None]:
noaa_data_subset = noaa_data[["datetime", "t_hr_avg", "rh_hr_avg", "solarad"]]


In [None]:
noaa_data_subset = noaa_data_subset[
    (noaa_data_subset.datetime >= "2022-02-1")
    & (noaa_data_subset.datetime < "2022-07-01")
]



In [None]:
house_data = noaa_data_subset.merge(
    pivoted_sense_data, how="left", on="datetime"
).merge(nest_data_hourly, how="left", on="datetime")



In [None]:
house_data


In [None]:
daily_fridge_kwh = (
    house_data.set_index("datetime").resample("D").fridge.mean().reset_index()
)


In [None]:
p9.options.figure_size = (5, 3)
p9.options.dpi = 200
ggplot(
    daily_fridge_kwh, aes(x="datetime", y="fridge")
) + geoms.geom_point() + geoms.geom_smooth(method="lm", color="blue") + themes.theme(
    axis_text_x=themes.element_text(rotation=45, hjust=1)
) + labels.ggtitle(
    "Fridge KWH usage over time"
) + labels.ylab(
    "Fridge Daily KWH"
) + labels.xlab(
    "Day"
)
