In [1]:
import os

import numpy as np
import pandas as pd

import cerf


In [2]:
def check_idx(variable, idx, delim):
    """Split expansion variable by delimiter.  Return 'none' as string
    if the requested subcategory is not present.  Otherwise, return the
    name of the requested.

    :param variable:                The variable string to parse.

    :param idx:                     The index number to parse.

    :param delim:                   The variable string delimiter.

    :return:                        'none' if index not present, else
                                    the name.

    """
    try:
        return variable.split(delim)[idx]

    except IndexError:
        return 'none'
    


In [17]:
f = '/Users/d3y010/projects/cerf/runs/GCAM_CERF_reporting_2019.04.11/GCAM_output_ReEDS_detailed.csv'
tech_file = '/Users/d3y010/projects/cerf/runs/GCAM_CERF_reporting_2019.04.11/GCAM_CERF_tech_assumptions_Ref.csv'

scenario = 'GCAMUSA_Reference'

prev_yr = 2010
target_yr = 2030



## need source

variable_cost_esc_rate: -0.00104311614063357
unit_size: 1350
discount_rate: 0.05
carbon_esc_rate: 0.0
carbon_tax: 0.0
buffer_in_km: 5
require_pipelines: False
suitability_raster_file: null


## Process technologies

In [11]:
gcam_to_cerf_techcats = {'Capacity Factor': 'capacity_factor', 
                         'Variable OM': 'variable_om', 
                         'Heat Rate': 'heat_rate', 
                         'Fuel Price': 'fuel_price',
                         'Fuel Price Escalation': 'fuel_esc_rate', 
                         'Fuel CO2 Content': 'fuel_co2_content', 
                         'Full Name': 'tech_name',
                         'Lifetime': 'lifetime', 
                         'Carbon Capture Rate': 'carbon_capture_rate'}

df = pd.read_csv(tech_file, usecols=['Scenario', 'Region-Origin', 'Year', 'Variable', 'Value', 'Unit'])

df.rename(columns={'Scenario': 'scenario', 
                   'Region-Origin': 'primary_zone', 
                   'Year': 'yr', 
                   'Variable': 'variable', 
                   'Value': 'value', 
                   'Unit': 'unit'}, inplace=True)

# extract target scenario and year
df = df.loc[(df['scenario'] == scenario) & (df['yr'] == target_yr)]

# create a catetory variable field
df['category'] = df['variable'].apply(lambda x: check_idx(x, 0, '|'))

# split out technology aggregated class (Biomass, Gas, etc.)
df['technology'] = df['variable'].apply(lambda x: check_idx(x, 1, '|'))

# exclude aggregated category that does not have specific technology breakouts
df = df.loc[df['technology'] != 'none']

df['subtech'] = df['variable'].apply(lambda x: check_idx(x, 2, '|'))
df['storage'] = df['variable'].apply(lambda x: check_idx(x, 3, '|'))

df['cerf_name'] = (df['technology'] + '_' + df['subtech'] + '_' + df['storage']).str.replace(' ', '_').str.lower().str.replace('_none', '')

tech_name_to_id = {v: k for k, v in enumerate(df.cerf_name.unique())}

df['cerf_tech_id'] = df['cerf_name'].map(tech_name_to_id)

df['cerf_category'] = df['category'].map(gcam_to_cerf_techcats)

df = df.loc[~df['cerf_category'].isna()].copy()


tech_id_to_name = {tech_name_to_id[k]: k for k in tech_name_to_id.keys()}


dfx = df[['cerf_name', 'cerf_category', 'cerf_tech_id', 'value']].copy()

dfx = dfx.loc[dfx['cerf_category'] != 'tech_name']

# dfx.fillna(0, inplace=True)

dfx['value'] = dfx['value'].astype(np.float64)

# dfx = dfx.loc[dfx['value'] > 0].copy()



tech_dict = {}

for i in df.cerf_tech_id.unique():

    tech_dict[i] = {}
    
    for j in df.cerf_category.unique():
        
        if j == 'tech_name':               
            tech_dict[i][j] = tech_id_to_name[i]
            
        else:
        
            dfv = dfx.loc[(dfx['cerf_tech_id'] == i) & (dfx['cerf_category'] == j)].copy() 
            
            if j == 'lifetime':
                med = np.nanmedian(dfv.value.values)
                
                if np.isnan(med):
                    med = 60
                    
            elif j == 'carbon_capture_rate':
                med = 0.0
                    
                    
            else:
                med = np.nanmedian(dfv.value.values)

                    
                                
                        
            tech_dict[i][j] = np.nanmedian(dfv.value.values)



  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, o

## Process expansion plan

In [12]:
df = pd.read_csv(f, usecols=['Scenario', 'Region-Origin', 'Year', 'Variable', 'Value'])

# rename columns
df.columns = ['scenario', 'primary_zone', 'yr', 'variable', 'value_gw']

# convert any NA values in input to 0
df.fillna(0, inplace=True)

# convert any NA values in input to 0
df.fillna(0, inplace=True)

# extract target scenario and year
df = df.loc[(df['scenario'] == scenario) & (df['yr'] == target_yr)]

# create a catetory variable field
df['category'] = df['variable'].apply(lambda x: check_idx(x, 0, '|'))

# get only electricity capacity values
df = df.loc[(df['category'] == 'Electricity Capacity')]

# split out technology aggregated class (Biomass, Gas, etc.)
df['technology'] = df['variable'].apply(lambda x: check_idx(x, 1, '|'))

# exclude aggregated category that does not have specific technology breakouts
df = df.loc[df['technology'] != 'none']

df['subtech'] = df['variable'].apply(lambda x: check_idx(x, 2, '|'))
df['storage'] = df['variable'].apply(lambda x: check_idx(x, 3, '|'))

df.drop(['variable', 'category'], axis=1, inplace=True)

# convert GW to MW
df['value_mw'] = df['value_gw'] * 1000
df.drop('value_gw', axis=1, inplace=True)

# df['cerf_name'] = (df['technology'] + '_' + df['subtech'] + '_' + df['storage']).str.replace(' ', '_').str.lower().str.replace('_none', '')



# set cerf_name default
df['cerf_name'] = 'none'

# coal conventional pulverized without ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'Conv Pul') &
                           (df['storage'].isin(['wo CCS'])),
                           'coal_conv_pul_wo_ccs', df['cerf_name'])

# coal conventional pulverized with ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'Conv Pul') &
                           (df['storage'].isin(['w CCS'])),
                           'coal_conv_pul_w_ccs', df['cerf_name'])

# coal igcc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'wo CCS'),
                           'coal_igcc_wo_ccs', df['cerf_name'])
# coal igcc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'w CCS'),
                           'coal_igcc_w_ccs', df['cerf_name'])

# gas ct with ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'].isin(['CT', 'ST'])) &
                           (df['storage'].isin(['w CCS'])),
                           'gas_cc_w_ccs', df['cerf_name'])

# gas ct without ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'].isin(['CT', 'ST'])) &
                           (df['storage'].isin(['wo CCS'])),
                           'gas_cc_wo_ccs', df['cerf_name'])

# gas cc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['w CCS'])),
                           'gas_cc_wo_ccs', df['cerf_name'])

# gas cc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['wo CCS'])),
                           'gas_cc_w_ccs', df['cerf_name'])


# oil cc
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['wo CCS', 'w CCS'])),
                           'oil_cc', df['cerf_name'])

# oil ct without ccs
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CT') &
                           (df['storage'].isin(['wo CCS'])),
                           'oil_ct_wo_ccs', df['cerf_name'])

# oil ct with ccs
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CT') &
                           (df['storage'].isin(['w CCS'])),
                           'oil_ct_w_ccs', df['cerf_name'])

# biomass conventional without ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'Conv') &
                           (df['storage'].isin(['wo CCS'])),
                           'biomass_conv_wo_ccs', df['cerf_name'])

# biomass conventional with ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'Conv') &
                           (df['storage'].isin(['w CCS'])),
                           'biomass_conv_w_ccs', df['cerf_name'])

# biomass igcc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'wo CCS'),
                           'biomass_igcc_wo_ccs', df['cerf_name'])

# biomass igcc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'w CCS'),
                           'biomass_igcc_w_ccs', df['cerf_name'])
# nuclear gen II
df['cerf_name'] = np.where((df['technology'] == 'Nuclear') &
                           (df['subtech'].isin(['Gen II'])),
                           'nuclear_gen_ii', df['cerf_name'])

# nuclear gen III
df['cerf_name'] = np.where((df['technology'] == 'Nuclear') &
                           (df['subtech'].isin(['Gen III'])),
                           'nuclear_gen_iii', df['cerf_name'])

# solar csp
df['cerf_name'] = np.where((df['technology'] == 'Solar') &
                           (df['subtech'].isin(['CSP'])) &
                           (df['storage'] != 'Dist') &
                           (df['storage'] != 'Non Dist'),
                           'solar_csp', df['cerf_name'])

# solar pv all
df['cerf_name'] = np.where((df['technology'] == 'Solar') &
                           (df['subtech'].isin(['PV'])) &
                           (df['storage'] != 'Dist') &
                           (df['storage'] != 'Non Dist'),
                           'solar_pv_non_dist', df['cerf_name'])

# wind onshore
df['cerf_name'] = np.where((df['technology'] == 'Wind') &
                           (df['subtech'] == 'Onshore'),
                           'wind_onshore', df['cerf_name'])

# drop none names
df.drop(['technology', 'subtech', 'storage', 'yr'], axis=1, inplace=True)
df = df.loc[df['cerf_name'] != 'none']


# get techs that have tech info
valid_techs = [tech_dict[i]['tech_name'] for i in tech_dict.keys()]

dfx = df.loc[df['cerf_name'].isin(valid_techs)]


dfx = dfx.groupby(['primary_zone', 'cerf_name']).sum()

dfx.reset_index(inplace=True)


tech_id_to_name = {tech_name_to_id[k]: k for k in tech_name_to_id}


# only keep techs with something to site
total_cap_by_tech = dfx.groupby('cerf_name')['value_mw'].sum().to_dict()
techs_with_cap = [i for i in total_cap_by_tech.keys() if total_cap_by_tech[i] > 0]

dfx = dfx.loc[dfx['cerf_name'].isin(techs_with_cap)].copy()

dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw
1,AK,biomass_conv_wo_ccs,0.270272
3,AK,biomass_igcc_wo_ccs,0.159291
5,AK,coal_conv_pul_wo_ccs,89.430124
8,AK,gas_cc_w_ccs,635.381643
9,AK,gas_cc_wo_ccs,40.667524


In [13]:
techs_with_cap

['biomass_conv_wo_ccs',
 'biomass_igcc_wo_ccs',
 'coal_conv_pul_wo_ccs',
 'gas_cc_w_ccs',
 'gas_cc_wo_ccs',
 'nuclear_gen_ii',
 'nuclear_gen_iii',
 'oil_ct_wo_ccs',
 'solar_csp',
 'solar_pv_non_dist',
 'wind_onshore']

#  START HERE BY ASSIGNING UNIT SIZE THE THE TECH DICT

In [14]:
unit_size = {'biomass_conv_wo_ccs': 80,
             'biomass_conv_w_ccs': 80,
             'biomass_igcc_wo_ccs': 400,
             'biomass_igcc_w_ccs': 380,
             'coal_conv_pul_wo_ccs': 600,
             'coal_conv_pul_w_ccs': 600,
             'coal_igcc_wo_ccs': 550,
             'coal_igcc_w_ccs': 380,
             'gas_cc_wo_ccs': 400,
             'gas_cc_w_ccs': 400,
             'gas_ct_wo_ccs': 400,
             'geothermal': None,
             'hydro': None,
             'nuclear_gen_ii': 1350,
             'nuclear_gen_iii': 1350,
             'oil_ct_wo_ccs': 400,
             'solar_csp': 100,
             'solar_pv_non_dist': 80,
             'wind_onshore': 251}

unit_size = {k: unit_size[k] for k in unit_size if k in techs_with_cap}


unit_size_by_id = {tech_name_to_id[k]: unit_size[k] for k in unit_size}

# variable cost escalation rate
vcer = {0: -0.00398993418629034,
         1: -0.00398993418629034,
         2: -0.00443288530388608,
         3: -0.00564811220001504,
         4: -0.00398993418629034,
         5: -0.00398993418629034,
         6: -0.00443288530388608,
         7: -0.00583677050505782,
         8: -0.00398993418629034,
         9: -0.00398993418629034,
         10: -0.00398993418629034,
         11: 0,
         12: 0,
         13: -0.00104311614063357,
         14: -0.00104311614063357,
         15: -0.00398993418629034,
         16: 1,
         17: 1,
         18: -0.00249607760279447}



   
technology_dict = {}

for k in unit_size_by_id.keys():
    
    vals = tech_dict[k]
    
    # values from original 
    vals['unit_size'] = unit_size_by_id[k]
    vals['variable_cost_esc_rate'] = vcer[k]
    vals['discount_rate'] = 0.05 
    vals['carbon_esc_rate'] = 0.0
    vals['carbon_tax'] = 0.0 
    vals['buffer_in_km'] = 5
    vals['carbon_capture_rate'] = 0.0
    
    # if gas tech
    if k in (8, 9, 10):
        rp = 'true'
    else:
        rp = 'false'
    
    vals['require_pipelines'] = rp 
    vals['suitability_raster_file'] = 'null'

    technology_dict[k] = vals



tech_to_unitsize = {technology_dict[k]['tech_name']: technology_dict[k]['unit_size'] for k in technology_dict.keys()}



dfx = dfx.loc[dfx['cerf_name'].isin(tech_to_unitsize.keys())].copy()


dfx['unit_size'] = dfx['cerf_name'].map(tech_to_unitsize)


dfx['n_sites'] = np.floor(dfx['value_mw'] / dfx['unit_size']).astype(int)


dfx['carryover_mw'] = dfx['value_mw'] - (dfx['n_sites'] * dfx['unit_size'])


states_dict = cerf.get_state_abbrev_to_name()

dfx['state_name'] = dfx['primary_zone'].map(states_dict)

# keep only CONUS states
dfx = dfx.loc[~dfx['state_name'].isna()].copy()


# add in tech id
dfx['tech_id'] = dfx['cerf_name'].map(tech_name_to_id)


## TESTING:  DO NOT RUN

In [15]:
out_df = f'/Users/d3y010/Desktop/config_{target_yr}.pkl'

dfx.to_pickle(out_df)


In [27]:
prev_file = f'/Users/d3y010/Desktop/config_{prev_yr}.pkl'

pdf = pd.read_pickle(prev_file)

dfx['key'] = dfx['primary_zone'] + '_' + dfx['tech_id'].astype(str)
pdf['key'] = pdf['primary_zone'] + '_' + pdf['tech_id'].astype(str)

pdf.set_index('key', inplace=True)



prev_dict = pdf[['value_mw']].to_dict()['value_mw']

prev_carry = pdf[['carryover_mw']].to_dict()['carryover_mw']

dfx['prev_mw'] = dfx['key'].map(prev_dict)
dfx['prev_carryover'] = dfx['key'].map(prev_carry)

dfx.fillna(0.0, inplace=True)

dfx['value_mw'] = dfx['value_mw'] - dfx['prev_mw'] + dfx['prev_carryover']

dfx.head()


Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size,n_sites,carryover_mw,state_name,tech_id,key,prev_mw,prev_carryover
17,AL,biomass_conv_wo_ccs,25.979643,80,0,25.979643,alabama,0,AL_0,68.251397,68.251397
19,AL,biomass_igcc_wo_ccs,10.121534,400,0,10.121534,alabama,2,AL_2,0.0,0.0
21,AL,coal_conv_pul_wo_ccs,-3726.202631,600,9,473.797369,alabama,4,AL_4,10161.33523,561.33523
24,AL,gas_cc_w_ccs,4786.220888,400,41,386.220888,alabama,9,AL_9,12073.939085,73.939085
25,AL,gas_cc_wo_ccs,-608.166223,400,15,191.833777,alabama,8,AL_8,6868.426304,68.426304


In [19]:
dfx['key'] = dfx['primary_zone'] + '_' + dfx['tech_id'].astype(str)
pdf['key'] = pdf['primary_zone'] + '_' + pdf['tech_id'].astype(str)

pdf.set_index('key', inplace=True)

prev_dict = pdf[['value_mw']].to_dict()['value_mw']

prev_carry = pdf[['value_mw']].to_dict()['carryover_mw']

dfx['prev_mw'] = dfx['key'].map(prev_dict)
dfx['prev_carryover'] = dfx['key'].map(prev_carry)

dfx.fillna(nan=0.0, inplace=True)

dfx['value_mw'] = dfx['value_mw'] - dfx['prev_mw'] + df['prev_carryover']


## FINISH TESTING

## Build CERF config

In [24]:
expansion_plan_dict = {}

for row in dfx.itertuples():
    
    sn = row.state_name
    ti = row.tech_id
    ns = row.n_sites
    
    if sn in expansion_plan_dict:
        expansion_plan_dict[sn][ti] = {'tech_name': row.cerf_name, 'n_sites': row.n_sites}
    
    else:
        expansion_plan_dict[sn] = {ti: {'tech_name': row.cerf_name, 'n_sites': row.n_sites}}



settings_dict = {'run_year': target_yr, 'output_directory': 'null', 'radomize': True, 'seed_value': 0}


lmp_zones_dict = {'lmp_zone_raster_file': 'null',  'lmp_zone_raster_nodata_value': 255, 'lmp_hourly_data_file': 'null'}


infrastructure_dict = {'substation_file': 'null', 
                       'transmission_costs_file': 'null',
                       'pipeline_file': 'null',
                       'output_rasterized_file': False,
                       'output_dist_file': False,
                       'output_alloc_file': False,
                       'output_cost_file': False}


yaml_dict = {'settings': settings_dict, 
             'technology': technology_dict,
             'expansion_plan': expansion_plan_dict,
             'lmp_zones': lmp_zones_dict,
             'infrastructure': infrastructure_dict
            }



out_yml = f'/Users/d3y010/repos/github/cerf/cerf/data/config_{target_yr}.yml'


with open(out_yml, 'w') as out:
    
    for k in yaml_dict.keys():
        
        out.write(f'\n{k}:\n')
        
        if k == 'settings':
            
            for i in yaml_dict[k].keys():
                
                out.write(f'    {i}: {yaml_dict[k][i]}\n')
                
        elif k == 'technology':
            
            for i in yaml_dict[k].keys():
                
                out.write(f'    {i}:\n')
                
                for x in yaml_dict[k][i].keys():
                    
                    out.write(f'        {x}: {yaml_dict[k][i][x]}\n')
                    
        elif k == 'expansion_plan':
            
            for i in yaml_dict[k].keys():
                
                out.write(f'    {i}:\n')
                
                for x in yaml_dict[k][i].keys():
                    
                    out.write(f'        {x}:\n')
                    
                    for j in yaml_dict[k][i][x].keys():
                        
                        out.write(f'            {j}: {yaml_dict[k][i][x][j]}\n')
                        
        elif k == 'lmp_zones':
            
            for i in yaml_dict[k].keys():
                
                out.write(f'    {i}: {yaml_dict[k][i]}\n')
        
        elif k == 'infrastructure':
            
            for i in yaml_dict[k].keys():
                
                out.write(f'    {i}: {yaml_dict[k][i]}\n')   
