In [1]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import pudl, ferc1, eia923, settings, constants
from pudl import models, models_ferc1, models_eia923
from pudl import clean_eia923, clean_ferc1, clean_pudl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pudl_engine  = pudl.db_connect_pudl()

In [7]:
#Pull in the boiler table
b_tbl = models.PUDLBase.metadata.tables['boiler_fuel_eia923']
b_select = sa.sql.select([b_tbl,]) #creates a sql Select object
b = pd.read_sql(b_select, pudl_engine) #converts sql object to pandas dataframe

In [8]:
b['fuel_consumed_mmbtu'] = b['fuel_qty_consumed']*b['fuel_mmbtu_per_unit']

In [59]:
b = b.set_index(pd.DatetimeIndex(b['report_date']))
b_gb = b.groupby(by=['plant_id',pd.TimeGrouper(freq='A'),'boiler_id'])
b_summed = b_gb.agg({'fuel_consumed_mmbtu':np.sum})

In [60]:
b_summed.reset_index(inplace=True)

In [61]:
#Pull in the boiler_generator_assn table
bga_tbl = models.PUDLBase.metadata.tables['boiler_generator_assn_eia860']
bga_select = sa.sql.select([bga_tbl,]) #creates a sql Select object
bga = pd.read_sql(bga_select, pudl_engine) #converts sql object to pandas dataframe

In [67]:
bg = b_summed.merge(bga,how='left',on=['plant_id','boiler_id'])

In [68]:
bg.drop(['id','operator_id'],axis=1, inplace=True)

In [69]:
bg = bg.set_index(pd.DatetimeIndex(bg['report_date']))
bg_gb = bg.groupby(by=['plant_id',pd.TimeGrouper(freq='A'),'generator_id'])
bg_summed = bg_gb.agg({'fuel_consumed_mmbtu':np.sum})

In [70]:
bg_summed.reset_index(inplace=True)

In [34]:
#Pull in the generation table
g_tbl = models.PUDLBase.metadata.tables['generation_eia923']
g_select = sa.sql.select([g_tbl,]) #creates a sql Select object
g = pd.read_sql(g_select, pudl_engine) #converts sql object to pandas dataframe

In [37]:
g = g.set_index(pd.DatetimeIndex(g['report_date']))
g_gb = g.groupby(by=['plant_id',pd.TimeGrouper(freq='A'),'generator_id'])
g_summed = g_gb.agg({'net_generation_mwh':np.sum})

In [42]:
g_summed.reset_index(inplace=True)

In [71]:
heat_rate = bg_summed.merge(g_summed,how='left',on=['plant_id', 'report_date', 'generator_id'])

In [72]:
heat_rate['heat_rate_mmbtu_mwh'] = heat_rate['fuel_consumed_mmbtu'] / heat_rate['net_generation_mwh']

In [73]:
heat_rate

Unnamed: 0,plant_id,report_date,generator_id,fuel_consumed_mmbtu,net_generation_mwh,heat_rate_mmbtu_mwh
0,3,2009-12-31,1,4.564270e+06,221908.000,20.568299
1,3,2009-12-31,2,8.094252e+06,394031.000,20.542171
2,3,2009-12-31,3,2.613198e+07,1286393.000,20.314146
3,3,2009-12-31,4,3.232146e+07,1626547.000,19.871212
4,3,2009-12-31,5,8.941819e+07,4513101.000,19.813027
5,3,2009-12-31,A1ST,1.760776e+05,1122697.000,0.156834
6,3,2009-12-31,A2ST,1.373917e+05,1033733.000,0.132908
7,3,2010-12-31,1,9.048664e+06,435334.000,20.785567
8,3,2010-12-31,2,1.144207e+07,538495.000,21.248231
9,3,2010-12-31,3,1.914051e+07,906322.000,21.118888
