In [None]:
import pandas as pd

-Format EIA Data-

In [None]:
# The EIA data table is formatted with notes at the beginning and end, drop them from dataframe
# see skiprows and skipfooter
eiaDropHead = [0, 1]

# Columns we don't need for GGPT (will drop later, putting here for visibility)
eiaDropCols = ['Sector', 'Net Summer Capacity (MW)', 'Net Winter Capacity (MW)', 'Technology', 'Operating Month', 'Planned Retirement Month', 'Retirement Month', 'Planned Operation Month',
'Google Map', 'Bing Map', 'Balancing Authority Code', 'Planned Derate Year', 'Planned Derate Month', 'Planned Derate of Summer Capacity (MW)', 'Planned Uprate Year', 'Planned Uprate Month', 
'Planned Uprate of Summer Capacity (MW)']

# EIA list of tabs to merge
eiaTabs = ['Operating', 'Planned', 'Retired', 'Canceled or Postponed', 'Operating_PR', 'Planned_PR', 'Retired_PR']

eiaList = [] # for storing each tab to merge later, better performance and this is a large table)
eiaLenQC = 0

# Loop over each sheet to read in and merge
for i in eiaTabs:
    print("Processing tab: ", i)

    eia_ = pd.read_excel(r'C:\Users\Sarah\Documents\GEM\research\North America\US\EIA\july_generator2021.xlsx', sheet_name=i, skiprows=eiaDropHead, skipfooter=2)
    eiaLenQC += len(eia_.index) # Can compare this number later to the sum of all rows in the EIA Excel sheet

    # Keep only gas-fired plants
    eia_ = eia_.loc[eia_['Energy Source Code'] == 'NG']
   
    # Add status column based on the tab's name, EIA Status column inconsistent
    eia_['Status_Tab'] = i.lower()

    # Store this tab's data to merge
    eiaList += [eia_]

# Combine tabs
eia = pd.concat(eiaList)

# Drop columns
eia.drop(columns=eiaDropCols, inplace=True)

# Total rows
print('\n')
print("Total row count of all tabs: ", eiaLenQC, '\n')
print("Total row count, only NG-fired units: ", len(eia.index), '\n')


Merge combined cycle units into single sets

In [None]:
# Merge units into proper sets
# 'ST', 'GT', and 'CS' are individual. 'CA' (steam) and 'CT' (gas turbine) need to be combined to 'CC'
ccSets = eia.loc[eia['Prime Mover Code'].isin(['CA', 'CT'])]

# Group by plant ID and Unit Code fields
# Sum capacities, group Generator IDs
ccGroup = ccSets.groupby(['Plant ID', 'Unit Code'], as_index=False).agg({'Nameplate Capacity (MW)': 'sum', 'Generator ID': lambda x: list(x), 'Status': lambda x: set(x),
'Operating Year': lambda x: set(x), 'Planned Retirement Year': lambda x: set(x), 'Planned Operation Year': lambda x: set(x), 'Retirement Year': lambda x: set(x), 'Entity ID': lambda x: set(x),
'Entity Name': lambda x: set(x)})

# Set the correct tech code
ccGroup['Prime Mover Code'] = 'CC'

# Check Sets for multiple values and turn into a range if necessary (e.g. different start years for units of a CC generator)

# Join back relevant plant-level columns
eiaBackfill = eia.drop_duplicates(subset='Plant ID', ignore_index=True)
eiaBackfill = eiaBackfill.drop(columns=['Unit Code', 'Nameplate Capacity (MW)', 'Generator ID', 'Status', 'Operating Year', 'Planned Retirement Year', 'Planned Operation Year',
'Retirement Year', 'Entity ID', 'Entity Name', 'Prime Mover Code'])
ccGroupJoin = ccGroup.merge(eiaBackfill, on='Plant ID', how='left')
print(ccGroupJoin.columns)

# Remove units from eia dataset, add back-in the new sets
        

# remove units <50 MW
  
# Change 'CS' to 'CC'
eia.loc[eia['Prime Mover Code'] == 'CS'] = 'CC'



Compare new data to GGPT values

In [None]:
# Match IDs to GGPT and compare field values (status, capacity, year)

# Locate any new unit IDs

# Check capacity for any new units

In [None]:
# Check for missing dual-fuel plants in EIA-860


In [None]:
# Add in ownership information from separate EIA table