# PV Notebook
This notebook will explore solar generation around the ERCOT 4CP events.

In [6]:
import pandas as pd
import psycopg2
import sqlalchemy as sqla
import os
from config.read_config import get_database_config
import sys
sys.executable  # shows you your path to the python you're using

'/Library/Developer/CommandLineTools/usr/bin/python3'

In [7]:
# read in db credentials from config/config.txt
database_config = get_database_config("./config/config.txt")


In [8]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))


In [None]:
# dates acquired from http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey

event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',
               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',
               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',
               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:16:00-05'
              ]
event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',
               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',
               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',
               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:31:00-05']

In [None]:
# dataids, pv direction, amount of PV of solar homes
query = """
select dataid, pv, pv_panel_direction, total_amount_of_pv, amount_of_west_facing_pv, amount_of_south_facing_pv
from other_datasets.metadata
where pv is not null
and total_amount_of_pv is not null
and grid is not null 
and solar is not null
and pv_panel_direction in ('South', 'West')
and egauge_1min_min_time < '2016-09-19'
and egauge_1min_max_time > '2019-06-19'
"""

# create a dataframe with the data from the sql query
df = pd.read_sql_query(sqla.text(query), engine)
df

In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
dataids_list
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str

In [None]:
# calculate usage grid minus solar (which is actually grid + solar because solar is negative use)
# 
query_2 = """
select dataid, localminute, solar, grid from electricity.eg_realpower_1min 
where localminute >= '2019-06-19 17:00:00-05' and localminute <= '2019-06-19 17:15:00-05'
and dataid in """

query_2 = query_2 + "({})".format(dataids_str)
print(query_2)

# create a dataframe with the data from the sql query
df2 = pd.read_sql_query(sqla.text(query_2), engine)

# Calculate the difference and add is as a new column
df2['solar+grid'] = df2.apply(lambda row: row.solar + row.grid, axis=1)
df2.head(15)

In [None]:
# group by dataid and average(?) the values
grouped = df2.groupby(['dataid']).mean()
grouped

In [None]:
# let's bring in the metadata on the dataids
joined = pd.merge(grouped, df, on='dataid')
joined

In [None]:
west_vs_south = joined.groupby(['pv_panel_direction']).mean()
west_vs_south