##This notebook explores how solar generation aligns with in-home HVAC use and what percent of the HVAC energy load is powered by the home’s rooftop solar over each season for one year?

In [None]:
#import packages
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
from config.read_config import get_database_config
import numpy as np
import sys
%matplotlib inline
sys.executable  # shows you your path to the python you're using

In [None]:
# read in db credentials from config/config.txt
# * make sure you add those to the config/config.txt file! *

database_config = get_database_config("./config/config.txt")

In [None]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))

In [None]:
#Select a list of Texas homes from dataport metadata having AC and solar configured and also has data for year 2018.
query = """select distinct dataid from other_datasets.metadata 
                                          where air1='yes' and solar='yes' 
                                          and egauge_1min_min_time < '2018-01-01' 
                                          and egauge_1min_max_time > '2019-01-01'
                                          and state='Texas'
                                          and (egauge_1min_data_availability like '100%' 
                                               or 
                                               egauge_1min_data_availability like '99%');
         """

df = pd.read_sql_query(sqla.text(query), engine)


In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))

In [None]:
#Check data completeness for dataids selected from metadata above.

query2 = """select dataid,count(*) total_rec from electricity.eg_realpower_1min 
            where dataid in ({})""".format(dataids_str)
query2 = query2 + """ and localminute >= '2018-01-01' and localminute < '2019-01-01' group by 1"""

df2 = pd.read_sql_query(sqla.text(query2), engine)



In [None]:
#Select 50 homes with atleast 99% data availability for year 2018.
df2['perc'] = (df2['total_rec']/525600)*100
final_dataids = df2[df2['perc'] >= 99].head(50)
final_dataids['dataid'].count()


In [None]:
# Pull data for selected 50 homes.
final_dataids_list = final_dataids['dataid'].tolist()
print("{} dataids selected listed here:".format(len(final_dataids_list)))
final_dataids_str = ','.join(list(map(str, final_dataids_list)))
final_dataids_str
final_dataids_list

In [None]:
#create 4 dataframes, one for every season

#summer
summer = """select localminute::timestamp,air1,air2,furnace1,furnace2,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-06-01' and localminute <  '2018-09-01' """
summer = summer + """AND dataid in ({})""".format(final_dataids_str)

# create a dataframe with the data from the sql query
summer_df = pd.read_sql_query(sqla.text(summer), engine)



In [None]:
#winter
winter = """select localminute::timestamp,air1,air2,furnace1,furnace2,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-12-01' and localminute <  '2019-03-01' """
winter = winter + """AND dataid in ({})""".format(final_dataids_str)

# create a dataframe with the data from the sql query
winter_df = pd.read_sql_query(sqla.text(winter), engine)


In [None]:
#spring
spring = """select localminute::timestamp,air1,air2,furnace1,furnace2,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-03-01' and localminute <  '2018-06-01' """
spring = spring + """AND dataid in ({})""".format(final_dataids_str)

spring_df = pd.read_sql_query(sqla.text(spring), engine)

In [None]:
#fall
fall = """select localminute::timestamp,air1,air2,furnace1,furnace2,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-09-01' and localminute <  '2018-12-01' """
fall = fall + """AND dataid in ({})""".format(final_dataids_str)

fall_df = pd.read_sql_query(sqla.text(fall), engine)

In [None]:
#data analysis for summer

#replace null with 0's 
summer_df = summer_df.fillna(0)

#create new column hvac.
summer_df['hvac'] = summer_df['air1'] + summer_df['air2'] + summer_df['furnace1'] + summer_df['furnace2']

# convert localminute to pandas datetime type
summer_df['datetime'] = pd.to_datetime(summer_df['localminute'])

summer_df = summer_df.set_index('datetime')

#create hour column. We will be calculating average hourly load for summer.
summer_df['hr'] = summer_df.index.hour

#create new dataframes with only solar, hvac and hour column
summer_df_new = pd.DataFrame(summer_df, columns = ['solar', 'hvac','hr'])

#group data based on hour and take avg
summer_df_grouped = summer_df_new.groupby(['hr']).mean()
plot_summer = summer_df_grouped.reset_index()



In [None]:
#plot summer
plt.figure(figsize=(20,10))
plt.plot(plot_summer['hr'],plot_summer['hvac'],label="hvac")
plt.plot(plot_summer['hr'],plot_summer['solar'],label="solar")
plt.xticks(np.arange(0, 24, 1.0))
plt.xlabel('hour')
plt.legend()

plt.show()

In [None]:
#Total hvac usage powered by solar in summer
total_hvac_summer = summer_df_new['hvac'].sum()
solar_hvac_summer = summer_df_new.loc[summer_df_new['hvac'] < summer_df_new['solar'], 'hvac'].sum()

hvac_powered_by_solar_summer = (solar_hvac_summer/total_hvac_summer)*100
hvac_powered_by_solar_summer

From above results we can see that 18% of HVAC energy load in summer is powered by the home’s rooftop solar.

In [None]:
#data analysis for fall

#replace null with 0's 
fall_df = fall_df.fillna(0)

#create new column hvac.
fall_df['hvac'] = fall_df['air1'] + fall_df['air2'] + fall_df['furnace1'] + fall_df['furnace2']

# convert localminute to pandas datetime type
fall_df['datetime'] = pd.to_datetime(fall_df['localminute'])

fall_df = fall_df.set_index('datetime')

#create hour column. We will be calculating average hourly load for fall.
fall_df['hr'] = fall_df.index.hour

#create new dataframes with only solar, hvac and hour column
fall_df_new = pd.DataFrame(fall_df, columns = ['solar', 'hvac','hr'])

#group data based on hour and take avg
fall_df_grouped = fall_df_new.groupby(['hr']).mean()
plot_fall = fall_df_grouped.reset_index()




In [None]:
#plot fall
plt.figure(figsize=(20,10))
plt.plot(plot_fall['hr'],plot_fall['hvac'],label="hvac")
plt.plot(plot_fall['hr'],plot_fall['solar'],label="solar")
plt.xticks(np.arange(0, 24, 1.0))
plt.xlabel('hour')
plt.legend()

plt.show()


In [None]:
#Total hvac usage powered by PV system in fall
total_hvac_fall = fall_df_new['hvac'].sum()
solar_hvac_fall = fall_df_new.loc[fall_df_new['hvac'] < fall_df_new['solar'], 'hvac'].sum()

hvac_powered_by_solar_fall = (solar_hvac_fall/total_hvac_fall)*100
hvac_powered_by_solar_fall

14% of HVAC energy load in Fall is powered by the home’s rooftop solar.

In [None]:
#data analysis for spring

#replace null with 0's 
spring_df = spring_df.fillna(0)

#create new column hvac.
spring_df['hvac'] = spring_df['air1'] + spring_df['air2'] + spring_df['furnace1'] + spring_df['furnace2']

# convert localminute to pandas datetime type
spring_df['datetime'] = pd.to_datetime(spring_df['localminute'])

spring_df = spring_df.set_index('datetime')

#create hour column. We will be calculating average hourly load for spring.
spring_df['hr'] = spring_df.index.hour

#create new dataframes with only solar, hvac and hour column
spring_df_new = pd.DataFrame(spring_df, columns = ['solar', 'hvac','hr'])

#group data based on hour and take avg
spring_df_grouped = spring_df_new.groupby(['hr']).mean()
plot_spring = spring_df_grouped.reset_index()

In [None]:
#plot spring
plt.figure(figsize=(20,10))
plt.plot(plot_spring['hr'],plot_spring['hvac'],label="hvac")
plt.plot(plot_spring['hr'],plot_spring['solar'],label="solar")
plt.xticks(np.arange(0, 24, 1.0))
plt.xlabel('hour')
plt.legend()

plt.show()

In [None]:
#Total hvac usage powered by PV system in spring
total_hvac_spring = spring_df_new['hvac'].sum()
solar_hvac_spring = spring_df_new.loc[spring_df_new['hvac'] < spring_df_new['solar'], 'hvac'].sum()

hvac_powered_by_solar_spring = (solar_hvac_spring/total_hvac_spring)*100
hvac_powered_by_solar_spring

21% of HVAC energy load in spring is powered by the home’s rooftop solar. 
This is also the highest among all four seasons.

In [None]:
#data analysis for winter

#replace null with 0's 
winter_df = winter_df.fillna(0)

#create new column hvac.
winter_df['hvac'] = winter_df['air1'] + winter_df['air2'] + winter_df['furnace1'] + winter_df['furnace2']

# convert localminute to pandas datetime type
winter_df['datetime'] = pd.to_datetime(winter_df['localminute'])

winter_df = winter_df.set_index('datetime')

#create hour column. We will be calculating average hourly load for winter.
winter_df['hr'] = winter_df.index.hour

#create new dataframes with only solar, hvac and hour column
winter_df_new = pd.DataFrame(winter_df, columns = ['solar', 'hvac','hr'])

#group data based on hour and take avg
winter_df_grouped = winter_df_new.groupby(['hr']).mean()
plot_winter = winter_df_grouped.reset_index()

In [None]:
#plot winter
plt.figure(figsize=(20,10))
plt.plot(plot_winter['hr'],plot_winter['hvac'],label="hvac")
plt.plot(plot_winter['hr'],plot_winter['solar'],label="solar")
plt.xticks(np.arange(0, 24, 1.0))
plt.xlabel('hour')
plt.legend()

plt.show()

In [None]:
#Total hvac usage pwered by PV system in winter
total_hvac_winter = winter_df_new['hvac'].sum()
solar_hvac_winter = winter_df_new.loc[winter_df_new['hvac'] < winter_df_new['solar'], 'hvac'].sum()

hvac_powered_by_solar_winter = (solar_hvac_winter/total_hvac_winter)*100
hvac_powered_by_solar_winter

From above season charts we can see that solar and home's HVAC usage peaks at different times except in winter. 
Solar peaks between 12PM-14PM while HVAC usage increases until 7PM and then starts to drop off. 
In winter average HVAC usage is almost constant with respect to solar generation.


Also, Spring is the season when HVAC load is most powered by home’s PV system. 
This makes sense because during spring we get enough sunlight but HVAC system are not used as much.