## US Energy Information Administration [EIA](https://www.eia.gov/)

* Electricity Form [EIA-860](https://www.eia.gov/electricity/data/eia860/)

* The survey Form EIA-860 collects generator-level specific information about existing and planned generators and associated environmental equipment at electric power plants with 1 megawatt or greater of combined nameplate capacity. 

1. Utility
2. Plant
3. Generator
    2. Wind
    3. Solar
    4. Energy Storage
    5. Multifuel
4. Owner
5. Environment
    1. Association
    2. Equipment
    
* Cost for generators [summary](https://www.eia.gov/electricity/generatorcosts/)

In [None]:
import  geopandas as gpd
import  pandas    as pd
import  matplotlib . pyplot  as  plt
from    matplotlib . colors  import  ListedColormap

In [None]:
xref = {
    'utility' : {
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','name','street','city','state','zip','owner','operator','manager','other','entity']
    },
    'plant' : {
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','street','city','state','zip','county',
                         'latitude','longitude','region','authority_code','authority','water_source','naics',
                        'regulatory_status','sector','sector_name','cogeneration','docket','small_pp','small_docket',
                        'wholesaler','wholesaler_docket','ash','ash_lined','ash_status','distributor','distributor_id','distributor_state',
                        'grid_voltage','grid_voltage_2','grid_voltage_3','storage',
                         'natgas_ldc_name','natgas_pipeline1','natgas_pipeline2','natgas_pipeline3','pipeline_notes',
                        'natgas_storage','lng_storage']
    },
    'generator' : {
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','state','county','generator_id',
                         'technology','prime_mover','unit_code','ownership','duct_burners','steam_recovery',
                         'node','location','MW','power_factor','summer_capacity','winter_capacity','minimum_load',
                         'rate_deltas','rate_month','rate_year','status','grid_sync','operating_month','operating_year',
                         'retirement_month','retirement_year','combo_heat_power','sector_name','sector','top_or_bottom',
                         'energy_source1','energy_source2','energy_source3','energy_source4','energy_source5','energy_source6',
                         'startup_source1','startup_source2','startup_source3','startup_source4',
                         'gassification','carbon_capture','turbines','restart_time','fluidized_tech','pulverized_tech',
                         'stoker_tech','combustion_tech','subcritical_tech','supercritical_tech','ultracritical_tech',
                         'planned_summer_uprate_capacity','planned_winter_uprate_capacity','planned_uprate_month','planned_uprate_year',
                         'planned_summer_derate_capacity','planned_winter_derate_capacity','planned_derate_month','planned_derate_year',
                         'planned_prime_mover','planned_energy_source1','planned_capacity','planned_repower_month','planned_repower_year',
                         'other_modifications','other_mod_month','other_mod_year','multiple_fuels','cofire','oil_natgas_switch']
    },
    'wind' : {
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','state','county','generator_id','status',
                         'technology','prime_mover','sector_name','sector','MW','summer_capacity','winter_capacity',
                         'operating_month','operating_year','number_turbines','manufacturer','model',
                         'design_speed','quality','height']
    },
    'solar' : {
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','state','county','generator_id','status',
                         'technology','prime_mover','sector_name','sector','MW','summer_capacity','winter_capacity',
                         'operating_month','operating_year','mirrors','single_axis','dual_axis','fixed_tilt','east_west_tilt',
                         'parabolic','fresnel','tower','dish','other_tech',
                        'azimuth','tilt','MW','crystaline','thin_film_CdTe','thin_film_ASi','thin_film_CIGS','thin_film_other',
                         'other_materials','net_metering','net_metering_capacity','net_metering_virtual','net_metering_virtual_capacity']
    },
    'energy' : { #storage, tabs for operable and retired/canceled
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','state','county','generator_id','status',
                         'technology','prime_mover','sector_name','sector','MW','summer_capacity','winter_capacity',
                         'operating_month','operating_year','MWh','max_charge_rate','minimum_discharge_rate',
                         'storage_tech1','storage_tech2','storage_tech3','storage_tech4',
                         'power_rating','enclosure','arb','frequency_regulation','load_following','spinning','colocated',
                         'distribution_deferral','peak_shaving','load_mgt','voltage_support','backup','excess_solar_wind']
    },
    'multifuel' : { # tabs for operable, proposed and retired/canceled
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','state','county','generator_id','status',
                         'technology','prime_mover','sector_name','sector','MW','summer_capacity','winter_capacity',
                         'operating_month','operating_year','source1','source2','multiple',
                         'cofire','cofire1','cofire2','cofire3','cofire4','cofire5','cofire6',
                         'oil_natgas_switch','operating_switch',
                         'summer_capacity_natgas','winter_capacity_natgas',
                         'summer_capacity_oil','winter_capacity_oil',
                         'switching_time_natgas2oil','switching_time_oil2natgas','switch_limiting_factors','storage_limits','air_limits','other_limits']
    },
    'owner' : {  ##plant_state compares to state everywhere else, distinguishes between ownership state
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','utility_name','plant_code','plant_name','plant_state','generator_id','status',
                         'name','street','city','state','zip','owner_id','share']
    },
    'enviroassoc' : { #association, 8 tabs for pollution remediation metrics
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','name','street','city','state','zip','owner','operator','manager','other','entity']
    },
    'enviroquip' : {  #equipment, 6 tavs 
        'skip_rows'  :  1 ,
        'skip_columns' :   0 ,
        'columns'    :  ['id','name','street','city','state','zip','owner','operator','manager','other','entity']
    },
}

def normalize ( file , spec ) :
    
    raw  =  pd . read_excel ( file )
    raw  =  raw .iloc [ spec [ 'skip_rows' ] : , spec [ 'skip_columns' ] : ]

    ## assign column names, change the defaults to normalize across datasets
    raw . columns  =  spec [ 'columns' ]
    ## reset index after dropped rows
    raw  =  raw . reset_index ( drop = True )
    
    return raw
    

## extract EIA electricity data for given year.
def eia_electricity_datasets ( 
    year  =  2020 , 
    url   =  'https://www.eia.gov/electricity/data/eia860/xls/eia860{year}.zip' ,
    subset = 'xls'   ## default all files; Solar for just solar, Wind for just wind, etc.
) :

    from urllib.request  import urlopen
    from io              import BytesIO
    from zipfile         import ZipFile

    data = dict ( )

    with urlopen ( url . format ( year = year ) ) as f :
        
        with BytesIO ( f . read ( ) ) as b , ZipFile ( b ) as myzipfile :
            
            for name in myzipfile . namelist ( ) [ 0 : -5 ] :  ##bunch of non-data files 

                if subset in name :

                    try :
                        if '__' in name :
                            file = name . split ( '_' ) [ 3 ] . lower ( )
                        else :
                            file = name . split ( '_' ) [ 2 ] . lower ( )

                        if file in [ 'utility' , 'plant' , 'generator' , 'wind' , 'solar' , 'energy' , 'multifuel' , 'owner' ] :
                            data [ file ] = normalize ( myzipfile . open ( name ) , xref [ file ] )

                    except:
                        print ( 'not tracked' , name )
    
    return data

In [None]:
eia   =  eia_electricity_datasets ( year = 2020 )

In [None]:
def merge_plant_generators_geo_encode ( eia ):
    plant = eia['plant'].copy()

    mask = (plant.longitude.apply(type)==str)   #bunch of blanks
    plant = plant[~mask]

    plants  =  gpd.GeoDataFrame(plant, geometry=gpd.points_from_xy(plant.longitude,plant.latitude))
    plants  =  plants.reset_index(drop=True)

    source = eia['generator'][['plant_code','technology','MW']].copy()

    mask = (pd.isnull(source.technology)) | (source.duplicated())
    source = source[~mask].reset_index(drop=True)


    plants = plants.merge(source,how='inner',on='plant_code')

    color_labels = source.technology.unique()
    col_values = ['black','green','blue','brown','black','brown','brown','gold','blue','brown','green','green','brown','red','green',
                 'black','black','black','red','brown','brown','green','green','brown','green','gold','green','bluegreen']#plt.cm.get_cmap()
    color_map = dict(zip(color_labels, col_values))
    colors = [color_map[label] for label in plants.technology.values]
    color_map = pd.DataFrame(color_map,index=[0]).T.reset_index()
    color_map.columns = ['technology','color']
    color_map=color_map.sort_values(['color','technology']).reset_index(drop=True)
    
    print(color_map.to_markdown())

    plants['colors'] = colors

    
    return plants, colors
    

In [None]:
%matplotlib widget


fig, ax = plt.subplots() 

plants, colors = merge_plant_generators_geo_encode ( eia )

cmap = ListedColormap(plants['colors'])


ax1 = plants.plot(
    ax=ax,
    #column='colors',
    color = colors,
    markersize=plants['MW'].fillna(0.01),
    legend=True,
    categorical=True,
    legend_kwds={'bbox_to_anchor':(1.0, 0.1),'fontsize':5,'frameon':False,"ncol":3}
)



states = gpd.read_file('https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_5m.zip').to_crs("EPSG:4326")
ax2 = states.geometry.boundary.plot(color='lightgray',linewidth=1,ax=ax)
ax.set_axis_off()

plt.title('US Electricity Generation Plants - EIA 2020')
plt.show()

In [None]:
# data_dir = '/data/code/jupyter/energy/solarsystem/data/'
# plants. to_file ( data_dir + 'eia_electricity_generating_plants.geojson' )

In [None]:
us_satellite_survey = '/PATH/TO/YOUR/USpredicted.geojson'

cols  =  ['id' , 'utility_name' , 'plant_code' , 'plant_name',  'state' , 'county' , 'sector' , 'sector_name' ]

solar = eia [ 'solar' ] . merge ( eia [ 'plant' ] , how = 'left' , on = cols )


solar  =  gpd . GeoDataFrame ( solar , geometry = gpd . points_from_xy ( solar . longitude , solar . latitude ) ) . reset_index ( drop = True )

predicted  =   gpd . read_file ( us_satellite_survey )

In [None]:
%matplotlib widget

fig, ax = plt.subplots() 

states = gpd.read_file('https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_5m.zip').to_crs("EPSG:4326")

ax1 = predicted.geometry.boundary.plot(color='blue', label='Satellite Survey - 2018',ax=ax)
ax2 = solar.geometry.plot(color='red', label='EIA - 2020 >0.5MW',ax=ax,markersize=1)
ax3 = states.geometry.boundary.plot(color='lightgray',linewidth=1,ax=ax)

ax.set_axis_off()

h1, l1 = ax1.get_legend_handles_labels()

plt.legend(h1, l1, loc=3)
plt.title('Solar arrays in USA')
plt.show()