# PV Notebook
This notebook will explore solar generation around the ERCOT 4CP events.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
from config.read_config import get_database_config
import sys
sys.executable  # shows you your path to the python you're using

In [None]:
%matplotlib inline

In [None]:
# read in db credentials from config/config.txt
# * make sure you add those to the config/config.txt file! *

database_config = get_database_config("./config/config.txt")


In [None]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))


In [None]:
# These are the ERCOT 4CP events for 2016 - 2019 acquired from
# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey

event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',
               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',
               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',
               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:16:00-05'
              ]
event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',
               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',
               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',
               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:31:00-05']

In [None]:
# Select the dataids, pv direction, amount of PV of solar homes
# we're selecting homes with just South and West facing PV that have data between the first event and the last event

###### AAAAAH I should probably check on 1 min availabililty percentange here as well.
##### TODO!

query = """
select dataid, pv, pv_panel_direction, total_amount_of_pv, amount_of_west_facing_pv, amount_of_south_facing_pv
from other_datasets.metadata
where pv is not null
and total_amount_of_pv is not null
and grid is not null 
and solar is not null
and pv_panel_direction in ('South', 'West')
and egauge_1min_min_time < '2016-06-15'
and egauge_1min_max_time > '2019-09-06'
and (egauge_1min_data_availability = '100%'
OR egauge_1min_data_availability = '99%'
OR egauge_1min_data_availability = '98%'
OR egauge_1min_data_availability = '97%'

)
"""

# create a dataframe with the data from the sql query
df = pd.read_sql_query(sqla.text(query), engine)
df

In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
dataids_list
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str

In [None]:
# Assemble the SQL query to pull the data for the selected dataids
# 
first_start = event_start_dates.pop(0)
first_end   = event_end_dates.pop(0)
query_2 = """
select dataid, localminute, solar, grid from electricity.eg_realpower_1min 
where ((localminute >= '{}' and localminute <= '{}') """.format(first_start, first_end)

for start, end in zip(event_start_dates, event_end_dates):
    query_2 = query_2 + "OR (localminute >= '{}' and localminute <= '{}') ".format(start, end)

query_2 = query_2 + """ ) AND dataid in ({})""".format(dataids_str)

# here's what that query is
print("sql query is \n" + query_2)

# create a dataframe with the data from the sql query
df2 = pd.read_sql_query(sqla.text(query_2), engine)

# calculate usage as grid minus solar (which is actually grid + solar because solar is negative use)
# Calculate the difference with a lambda function and add it as a new column called 'usage'
df2['usage'] = df2.apply(lambda row: row.solar + row.grid, axis=1)
df2.head(15)

In [None]:
df2 = df2.dropna()

In [None]:
# convert localminute to pandas datetime type
df2['datetime'] = pd.to_datetime(df2['localminute'])

# and set as index
df2 = df2.set_index('datetime')
df2

In [None]:
# group by month and dataid and take the mean of solar, grid, and usage within those groups
grouped = df2.groupby([pd.Grouper(freq='M'), 'dataid']).mean()
grouped

In [None]:
# map pv direction onto dataids with a merge after resetting the index
grouped = grouped.reset_index()
grouped = grouped.merge(df, how='left', left_on='dataid', right_on='dataid')
grouped

In [None]:
# reindex by the date
grouped = grouped.set_index('datetime')
grouped

In [None]:
# regroup by year and pv_panel_direction and average the values
year_west_vs_south = (grouped.groupby([pd.Grouper(freq='Y'),'pv_panel_direction']).mean())
# we don't need a mean of the dataids, so we can drop that column now
year_west_vs_south = year_west_vs_south.drop(columns=['dataid'])
year_west_vs_south

# despite the higher in-home usage in the west facing houses (usage) the solar production is so high that it is still putting power back on the grid (grid is smaller or even negative in most cases in West facing homes)

In [None]:
# now just drop unneeded columns and rearrange them and simplify the view
year_west_vs_south = year_west_vs_south.reset_index()
year_west_vs_south['year'] = pd.DatetimeIndex(year_west_vs_south['datetime']).year
year_west_vs_south = year_west_vs_south[['year', 'pv_panel_direction','solar', 'grid', 'usage']]
year_west_vs_south = year_west_vs_south.set_index('year')
year_west_vs_south

In [None]:
from matplotlib.pyplot import figure
plot = year_west_vs_south.plot(kind='bar',figsize=(25,15), title="Solar production, Net Grid Usage (home usage from the grid minus solar production), and Home Usage During ERCOT 4CP events")
labels = plot.set_xticklabels(['2016-S', '2016-W', '2017-S', '2017-W', '2018-S', '2018-W', '2019-S', '2019-W'])
