# Efficiency Notebooks: Exploring the emissions density of power used by homes
We'll be using data from the Texas ISO, ERCOT, to calculate the emissions amount and emission sources from the generated power used by the electrical grid home usage.

Entirely solar equipped homes will be used to demonstrate the interplay between solar generation and emissions during this time period (March - August 2018)

ERCOT emissions/generation data pulled from http://www.ercot.com/content/wcm/lists/181766/FuelMixReport_PreviousYears.zip 
which you can find at http://www.ercot.com/gridinfo/generation

In [None]:
#import packages
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
import sys
sys.path.insert(0,'..')
from config.read_config import get_database_config
import numpy as np
import statistics
%matplotlib inline
sys.executable  # shows you your path to the python you're using

In [None]:
# set constants for lbs of CO2 / kWh
_gas_cc_lbs = 0.75
_gas_lbs = 1.0
_coal_lbs = 2.21

In [None]:
# read in db credentials from ../config/config.txt
# * make sure you add those to the ../config/config.txt file! *

## Uncomment the following line to use the live database queries

database_config = get_database_config("../config/config.txt")

#

In [None]:
# get our DB connection

# uncomment if you want to use the live queries to the database instead of the prepared data

engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))


In [None]:
#Select a list of Austin homes from dataport metadata with good data availability for grid
query = """select distinct dataid, egauge_1min_data_availability, grid, solar from other_datasets.metadata 
                                          where grid = 'yes'
                                          and solar = 'yes'
                                          and egauge_1min_min_time <= '2018-03-01' 
                                          and egauge_1min_max_time > '2018-09-01'
                                          and city='Austin'
                                          and (egauge_1min_data_availability like '100%' 
                                               or 
                                               egauge_1min_data_availability like '99%'
                                               or
                                               egauge_1min_data_availability like '98%'
                                               or
                                               egauge_1min_data_availability like '97%'
                                               )
                                          and gas_ert_min_time <= '2018-03-01'
                                          and gas_ert_max_time > '2018-09-01'
                                          limit 50
                                          ;
         """

df = pd.read_sql_query(sqla.text(query), engine)
df

In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str

In [None]:
#Pull electricity data for selected homes.
data = """select dataid,localminute::timestamp,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-03-01' and localminute <  '2018-09-01' """
data = data + """AND dataid in ({})""".format(dataids_str)

# create a dataframe with the data from the sql query
grid_df = pd.read_sql_query(sqla.text(data), engine)

grid_df

In [None]:
# read in 2018 ERCOT emissions data
ercot = pd.read_csv('ercot_emissions_2018.csv')

# index by Energy, GWh  
ercot = ercot.set_index('Energy, GWh')

# remove the commas from the numbers
ercot.replace(',','', regex=True, inplace=True)

# convert to a float from a string
ercot = ercot.astype('float64')

ercot

In [None]:
# Calc just one of them months and sources for a sanity check
perc_coal_mar = ercot.loc['Coal','Mar'] / ercot.loc['Total','Mar']
perc_coal_mar

In [None]:
# find the percentages for coal, gas, and gas-cc of the total blend of generation sources from ERCOT for our months and the emissions-producing sources
sources = ['Coal', 'Gas', 'Gas-CC']
months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug']
percs = {}
for source in sources:
    for month in months:
        percs[source + '' + month] = ercot.loc[source, month] / ercot.loc['Total', month]
percs

In [None]:
# take the mean across the months for each source
coal_ave = statistics.mean([percs['CoalMar'], percs['CoalApr'], percs['CoalMay'], percs['CoalJun'], percs['CoalJul'], percs['CoalAug']])
gas_ave  = statistics.mean([percs['GasMar'], percs['GasApr'], percs['GasMay'], percs['GasJun'], percs['GasJul'], percs['GasAug']])
gascc_ave  = statistics.mean([percs['Gas-CCMar'], percs['Gas-CCApr'], percs['Gas-CCMay'], percs['Gas-CCJun'], percs['Gas-CCJul'], percs['Gas-CCAug']])

print ('Coal = {}%'.format(coal_ave * 100))
print ('Gas = {}%'.format(gas_ave * 100))
print ('Gas-CC = {}%'.format(gascc_ave * 100))

In [None]:
# complete the full percentage, fill with the rest of the sources that are largely non-emissions producing
the_rest = 1.0 - coal_ave - gas_ave - gascc_ave

# pie chart
pie_data = [coal_ave, gas_ave, gascc_ave, the_rest]
pie_labels = ['Coal', 'Gas', 'Gas-CC', 'Other']
explode = [.05, .05, .05, .05] # separates the slices a little bit
plt.pie(pie_data, labels=pie_labels, autopct='%1.1f%%', startangle=15, shadow = True, explode=explode)
plt.title('ERCOT Generation Percentages')
plt.axis('equal')
plt.show()

In [None]:
# convert ercot table to percentages:
def add_percentages(column):
    return column / column['Total']

ercot_perc = ercot.apply(add_percentages)
ercot_perc

In [None]:
ercot_perc.index.name = "% of Generation"
ercot_perc

In [None]:
# clean up that percentage table
ercot_perc = ercot_perc.drop(index=['Biomass', 'Hydro', 'Nuclear', 'Other', 'Solar', 'Wind', 'Total'], columns=['Jan', 'Feb', 'Sep', 'Oct', 'Nov', 'Dec', 'Total'])
ercot_perc

In [None]:
# index by localminute 
grid_df = grid_df.set_index('localminute')

# bring to central timezone
grid_df = grid_df.tz_localize(tz='US/Central')
grid_df

In [None]:
# drop any rows that have blank grid
grid_df = grid_df.dropna(how='any')
grid_df

In [None]:
# calculate the average grid usage of the homes over this time period
grouped_grid = grid_df.groupby([pd.Grouper(freq='D')]).mean()
grouped_grid['grid'] = grouped_grid['grid'] * 24 # converts daily average grid use/generation to kWh
grouped_grid

In [None]:
# the above was using the monthly averages from Mar - Aug from ercot all averaged together for each source
# let's use the actual monthy averages for each point instead
grid_more = grouped_grid

In [None]:
# extract and addd the month to the dataframe
grid_more['Month'] = grid_more.index.strftime('%B')
grid_more['Month'] = grid_more['Month'].astype(str)
grid_more

In [None]:
# convert the month to the same 3 letter abbreviation as in the ERCOT table
def shorten_month(col):
    col['Month'] = col['Month'][0:3]
    return col

grid_more = grid_more.apply(shorten_month, axis=1)
grid_more

In [None]:
# Assign based on the monthly percentage breakdown
def assign_lbs(row):
    row['Gas-CC lbs'] = (ercot_perc.loc['Gas-CC', row.Month] * row['grid']) * _gas_cc_lbs
    row['Gas lbs'] = (ercot_perc.loc['Gas', row.Month] * row['grid']) * _gas_lbs
    row['Coal lbs'] = (ercot_perc.loc['Coal', row.Month] * row['grid']) * _coal_lbs
    return row


grid_more = grid_more.apply(assign_lbs, axis=1)
grid_more

In [None]:
# don't need these anymore
grid_more = grid_more.drop(columns=['dataid', 'Month'])
grid_more

In [None]:
# Add a total CO2 column
grid_more['Total CO2'] = grid_more['Gas-CC lbs'] + grid_more['Gas lbs'] + grid_more['Coal lbs']
grid_more

In [None]:
grid_more = grid_more.rename({'grid':'Grid Use (kWh)'} , axis='columns')

In [None]:
grid_more.plot(figsize=(25,15), title='Daily Grid (kWh) and Emissions in lbs of CO2', grid=True, xlabel='Day', ylabel='kWh or lbs CO2')

# Observations:

- These all solar homes have the capacity to offset some of the neighbors' emissions in the "shoulder months" by putting energy back on the grid
- Total CO2 as measured in lbs/kWh tracks at nearly 1-to-1
