# This notebook explores how household cooling align with the State of Texas’s critical peak power events? We will be using one-minute interval data from 50 Texas homes and will try to find out how many homes(%) had air conditioning on during these CP events for each year from 2016-2019.

In [None]:
#import packages
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
from config.read_config import get_database_config
import numpy as np
import sys
%matplotlib inline
sys.executable  # shows you your path to the python you're using

In [None]:
# read in db credentials from config/config.txt
# * make sure you add those to the config/config.txt file! *

database_config = get_database_config("./config/config.txt")

In [None]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))

In [None]:
# These are the ERCOT 4CP events (start date/time and end date/time) for 2016 - 2019 acquired from
# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey

event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',
               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',
               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',
               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:16:00-05'
              ]
event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',
               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',
               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',
               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:31:00-05']

In [None]:
# Select homes with solar and hvac configured in Texas. 

query = """select dataid from other_datasets.metadata 
           where air1='yes' and solar='yes' and state='Texas';
        """

# create a Pandas dataframe with the data from the sql query
df = pd.read_sql_query(sqla.text(query), engine)
df

In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str
dataids_list

In [None]:
#Assemble the SQL query to pull 1 minute data for the selected dataids for the CP dates mentioned above.
 
first_start = event_start_dates.pop(0)
first_end   = event_end_dates.pop(0)
query_2 = """
select dataid, localminute,air1,air2,furnace1,furnace2,solar,grid from electricity.eg_realpower_1min 
where ((localminute >= '{}' and localminute <= '{}') """.format(first_start, first_end)

for start, end in zip(event_start_dates, event_end_dates):
    query_2 = query_2 + "OR (localminute >= '{}' and localminute <= '{}') ".format(start, end)

query_2 = query_2 + """ ) AND dataid in ({})""".format(dataids_str)

# here's what that query is
print("sql query is \n" + query_2)

# create a dataframe with the data from the sql query
df2 = pd.read_sql_query(sqla.text(query_2), engine)

df2.head(15)

In [None]:
# Checking completeness of data. We will consider only those homes that have 100% data for the 4CP dates
data = df2.groupby(['dataid']).size().reset_index(name='counts')
data['perc'] = (data['counts']/256)*100
ndata = data[data['perc'] == 100].head(50)
final_dataids = ndata['dataid']
final_dataids.count()
df3=df2.loc[df2['dataid'].isin(final_dataids)]

In [None]:
#replace null with 0's 
df3 = df3.fillna(0)
df3.head(10)

In [None]:
#create new column hvac.(hvac is sum of air compressor and furnace circuits.)
df3['hvac'] = df3['air1'] + df3['air2'] + df3['furnace1'] + df3['furnace2']
df3['localminute'] = df3['localminute'].dt.date
df3.head(10)

In [None]:
#create new dataframe with columns dataid,hvac and date.
hvac_cp_data = pd.DataFrame(df3, columns = ['dataid','hvac', 'localminute'])
hvac_cp_data.head(10)

In [None]:
#group data by dataid and date and take average of hvac. 
#This gives us avaerage hvac usage for each home for each CP event day.
hvac_cp_data_grouped = hvac_cp_data.groupby(['localminute','dataid']).mean()
reset_hvac_cp_data_grouped = hvac_cp_data_grouped.reset_index()
reset_hvac_cp_data_grouped.head(10)

In [None]:
#filtering data only for those interval when AC is actually cooling..
#We consider AC is ON and cooling only when it's drawing power of atleast 500W. 

hvac_on = reset_hvac_cp_data_grouped[reset_hvac_cp_data_grouped['hvac'] > 0.5]
homes_on = (hvac_on['localminute'].value_counts()/50)*100
homes_on

## From above results we can see that 70% of homes were ON and cooling for each CP event day from 2016-2019.