# Savings from a battery and solar system during ERCOT 4CP events

We're going to look at if solar equipped homes stored all excess energy produced on the day of a 4CP event between 7AM until 4PM, then started discharging it from 4-5:30PM. How much energy would be stored up, and what would the potential savings of that be based on a cost of $55/kWh.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
from config.read_config import get_database_config
import sys
%matplotlib inline
sys.executable  # shows you your path to the python you're using

In [None]:
# read in db credentials from config/config.txt
# * make sure you add those to the config/config.txt file! *

database_config = get_database_config("./config/config.txt")


In [None]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))


In [None]:
# Set the cost of a kWh in dollars
cost_kWh = 55

# These are the ERCOT 4CP events (start date/time and end date/time) for 2016 - 2019 acquired from
# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey

event_days = ['2019-06-19', '2019-07-30', '2019-08-12', '2019-09-06',
               '2018-06-27', '2018-07-19', '2018-08-23', '2018-09-19',
               '2017-06-23', '2017-07-28', '2017-08-16', '2017-09-20',
               '2016-06-15', '2016-07-14', '2016-08-11', '2016-09-19'
              ]

# we're going to look at using 7AM to 4PM to charge the theoretical battery as the time the sun becomes available to the PV systems 
# until the earliest possible time of a 4CP event (4PM), then discharge from 4PM - 5:30PM to cover any 4CP timeslots that have happened thus far
start_time = '07:00:00-05'
end_time   = '16:00:00-05'

# these are the actual 4CP event start times and end times from 2016-2019 for reference
event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',
               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',
               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',
               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:15:00-05'
              ]
event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',
               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',
               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',
               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:30:00-05'
                  ]

In [None]:
# let's select homes that have solar and a high amount of data completeness
# we're also filtering out homes that have really large solar arrays (larger than 6.25)
# we're excluding dataids 5448, 2925 due to having a battery already
query = """
select dataid, pv, total_amount_of_pv
from other_datasets.metadata
where pv is not null
and grid is not null 
and solar is not null
and total_amount_of_pv is not null
and total_amount_of_pv <= 6.25
and egauge_1min_min_time < '2016-06-15'
and egauge_1min_max_time > '2019-09-06'
and dataid not in (5448, 2925)
and (egauge_1min_data_availability like '%100%' or egauge_1min_data_availability like '99%' or egauge_1min_data_availability like '98%')
limit 25
"""

# create a Pandas dataframe with the data from the sql query
dataids = pd.read_sql_query(sqla.text(query), engine)
dataids.head(10)

In [None]:
dataids.describe()

In [None]:
# extract the dataids
# grab dataids and convert them to a string to put into the SQL query
dataids_list = dataids['dataid'].tolist()
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str
# dataids_list

In [None]:
# select the data for all of the events for all of these homes starting at 7AM and ending at 4PM
# the first date starting at midnight and ending at the end time of one of the 4CP events

first_start = event_days.pop(0)
energy_query = """
select dataid, localminute, solar, grid from electricity.eg_realpower_1min 
where ((localminute >= '{} {}' and localminute <= '{} {}') """.format(first_start, start_time, first_start, end_time)

for day in event_days:
    energy_query = energy_query + "OR (localminute >= '{} {}' and localminute <= '{} {}') ".format(day, start_time, day, end_time)

energy_query = energy_query + """ ) AND dataid in ({})""".format(dataids_str)

print("query is {}".format(energy_query))

df2 = pd.read_sql_query(sqla.text(energy_query), engine)

In [None]:
df2.describe()

In [None]:
# calculate usage as grid minus solar (which is actually grid + solar because solar is negative use)
# Calculate the difference with a lambda function and add it as a new column called 'usage'

# NOTE: This takes a while to run, after all it's running this lambda function on ~520k points
df2['usage'] = df2.apply(lambda row: row.solar + row.grid, axis=1)
df2.describe()

In [None]:
# what's going on with the neg usage?
wat = df2.loc[df2['usage'] < 0]
wat.dataid.unique()

In [None]:
# convert localminute to pandas datetime type
df2['datetime'] = pd.to_datetime(df2['localminute'])

# and set as index
df2 = df2.set_index('datetime')

# set local timezone
df2 = df2.tz_convert('US/Central')

df2

In [None]:

# group by month and dataid and sum negative grid grouping into month by dataid giving us each home's accumulated negative grid 
# (extra solar production) for that month's 4CP event day between 7AM and 4PM
df3 = df2.loc[df2['grid'] < 0].groupby([pd.Grouper(freq='M'), 'dataid']).sum()
df3

In [None]:
# we're going to have a look at what happens when we average them all together by month 
df4 = df3.reset_index()
df4 = df4.set_index('datetime')
df4 = df4.groupby([pd.Grouper(freq='M')]).mean()

# drop all rows that are all NaN
df4 = df4.dropna(thresh=3)

# drop all the dataids
df4 = df4.drop(columns=['dataid'])

# convert to KWh
df4 = df4.apply(lambda x : x / 60.0)

# this gives us the 16 event days averaged together per day
df4

In [None]:
# let's put this all on a bar chart with a set of solar/grid/usage per day
ax = df4.plot.bar(rot=90, figsize=[60,30], fontsize=25, grid=True)
ax.set_xlabel('4CP Event Date', fontsize = 30)
ax.set_ylabel('kWh', fontsize = 30)
legend = ax.legend(loc=1, prop={'size': 50})

In [None]:
# OK, back to the 3rd dataframe before we averaged them all together by month when we simply had grouped by month and dataid 
# and taken a sum of the rows with negative grid

# let's drop the usage and solar columns now that we'll only be working with the grid column
df3 = df3.drop(columns=['solar','usage'])
df3.describe()

In [None]:
# average that entire negative grid column
ave_neg = df3.mean()
ave_neg.grid

In [None]:
# convert summed up usage to give us kWh by dividing by 60 (one minute data / 60 because there are 60 minutes per hour)
kWh = ave_neg.grid / 60
kWh

In [None]:
# divide by 1.5 to calculate the kW discharged over the 1.5 hours of the time between 4PM and 5:30PM to cover the potential 4CP event.
kW = kWh / 1.5
kW

In [None]:
# At a rate of $550 / kW how much on average is saved per house if we store up all the solar produced starting at 
# 7AM until 4PM and then discharge to cover the potential 4CP event that day from 4PM - 5:30 PM
value = abs(kW) * cost_kWh
print("Average $ saved per house if they charged their battery day of a 4CP event, then started discharging at 4PM would be ${}".format(str(round(value, 2))))