In [1]:
import os

import numpy as np
import pandas as pd

import cerf


In [2]:
def check_idx(variable, idx, delim):
    """Split expansion variable by delimiter.  Return 'none' as string
    if the requested subcategory is not present.  Otherwise, return the
    name of the requested.

    :param variable:                The variable string to parse.

    :param idx:                     The index number to parse.

    :param delim:                   The variable string delimiter.

    :return:                        'none' if index not present, else
                                    the name.

    """
    try:
        return variable.split(delim)[idx]

    except IndexError:
        return 'none'
    
    

    

In [20]:
f = '/Users/d3y010/projects/cerf/runs/GCAM_CERF_reporting_2019.04.11/GCAM_output_ReEDS_detailed.csv'
tech_file = '/Users/d3y010/projects/cerf/runs/GCAM_CERF_reporting_2019.04.11/GCAM_CERF_tech_assumptions_Ref.csv'

scenario = 'GCAMUSA_Reference'
target_yr = 2010



## need source

variable_cost_esc_rate: -0.00104311614063357
unit_size: 1350
discount_rate: 0.05
carbon_esc_rate: 0.0
carbon_tax: 0.0
buffer_in_km: 5
require_pipelines: False
suitability_raster_file: null


## Process technologies

In [58]:
gcam_to_cerf_techcats = {'Capacity Factor': 'capacity_factor', 
                         'Variable OM': 'variable_om', 
                         'Heat Rate': 'heat_rate', 
                         'Fuel Price': 'fuel_price',
                         'Fuel Price Escalation': 'fuel_esc_rate', 
                         'Fuel CO2 Content': 'fuel_co2_content', 
                         'Full Name': 'tech_name',
                         'Lifetime': 'lifetime', 
                         'Carbon Capture Rate': 'carbon_capture_rate'}

df = pd.read_csv(tech_file, usecols=['Scenario', 'Region-Origin', 'Year', 'Variable', 'Value', 'Unit'])

df.rename(columns={'Scenario': 'scenario', 
                   'Region-Origin': 'primary_zone', 
                   'Year': 'yr', 
                   'Variable': 'variable', 
                   'Value': 'value', 
                   'Unit': 'unit'}, inplace=True)

# extract target scenario and year
df = df.loc[(df['scenario'] == scenario) & (df['yr'] == target_yr)]

# create a catetory variable field
df['category'] = df['variable'].apply(lambda x: check_idx(x, 0, '|'))

# split out technology aggregated class (Biomass, Gas, etc.)
df['technology'] = df['variable'].apply(lambda x: check_idx(x, 1, '|'))

# exclude aggregated category that does not have specific technology breakouts
df = df.loc[df['technology'] != 'none']

df['subtech'] = df['variable'].apply(lambda x: check_idx(x, 2, '|'))
df['storage'] = df['variable'].apply(lambda x: check_idx(x, 3, '|'))

df['cerf_name'] = (df['technology'] + '_' + df['subtech'] + '_' + df['storage']).str.replace(' ', '_').str.lower().str.replace('_none', '')

tech_name_to_id = {v: k for k, v in enumerate(df.cerf_name.unique())}

df['cerf_tech_id'] = df['cerf_name'].map(tech_name_to_id)

df['cerf_category'] = df['category'].map(gcam_to_cerf_techcats)

df = df.loc[~df['cerf_category'].isna()].copy()

df.head()




Unnamed: 0,scenario,primary_zone,yr,variable,unit,value,category,technology,subtech,storage,cerf_name,cerf_tech_id,cerf_category
0,GCAMUSA_Reference,AK,2010,Capacity Factor|Biomass|Conv|wo CCS,,,Capacity Factor,Biomass,Conv,wo CCS,biomass_conv_wo_ccs,0,capacity_factor
9,GCAMUSA_Reference,AL,2010,Capacity Factor|Biomass|Conv|wo CCS,,0.609,Capacity Factor,Biomass,Conv,wo CCS,biomass_conv_wo_ccs,0,capacity_factor
18,GCAMUSA_Reference,AR,2010,Capacity Factor|Biomass|Conv|wo CCS,,0.609,Capacity Factor,Biomass,Conv,wo CCS,biomass_conv_wo_ccs,0,capacity_factor
27,GCAMUSA_Reference,AZ,2010,Capacity Factor|Biomass|Conv|wo CCS,,0.609,Capacity Factor,Biomass,Conv,wo CCS,biomass_conv_wo_ccs,0,capacity_factor
36,GCAMUSA_Reference,CA,2010,Capacity Factor|Biomass|Conv|wo CCS,,0.609,Capacity Factor,Biomass,Conv,wo CCS,biomass_conv_wo_ccs,0,capacity_factor


In [59]:
df.cerf_category.unique()

array(['capacity_factor', 'variable_om', 'heat_rate', 'fuel_price',
       'fuel_esc_rate', 'fuel_co2_content', 'tech_name', 'lifetime',
       'carbon_capture_rate'], dtype=object)

In [60]:
tech_id_to_name = {tech_name_to_id[k]: k for k in tech_name_to_id.keys()}


In [61]:
tech_id_to_name = {tech_name_to_id[k]: k for k in tech_name_to_id.keys()}


dfx = df[['cerf_name', 'cerf_category', 'cerf_tech_id', 'value']].copy()

dfx = dfx.loc[dfx['cerf_category'] != 'tech_name']

# dfx.fillna(0, inplace=True)

dfx['value'] = dfx['value'].astype(np.float64)

# dfx = dfx.loc[dfx['value'] > 0].copy()

dfx.head()


Unnamed: 0,cerf_name,cerf_category,cerf_tech_id,value
0,biomass_conv_wo_ccs,capacity_factor,0,
9,biomass_conv_wo_ccs,capacity_factor,0,0.609
18,biomass_conv_wo_ccs,capacity_factor,0,0.609
27,biomass_conv_wo_ccs,capacity_factor,0,0.609
36,biomass_conv_wo_ccs,capacity_factor,0,0.609


In [62]:
tech_dict = {}

for i in df.cerf_tech_id.unique():

    tech_dict[i] = {}
    
    for j in df.cerf_category.unique():
        
        if j == 'tech_name':               
            tech_dict[i][j] = tech_id_to_name[i]
            
        else:
        
            dfv = dfx.loc[(dfx['cerf_tech_id'] == i) & (dfx['cerf_category'] == j)].copy()          

            tech_dict[i][j] = dfv.value.median()



  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

## Process expansion plan

In [63]:
df = pd.read_csv(f, usecols=['Scenario', 'Region-Origin', 'Year', 'Variable', 'Value'])

# rename columns
df.columns = ['scenario', 'primary_zone', 'yr', 'variable', 'value_gw']

# convert any NA values in input to 0
df.fillna(0, inplace=True)

# convert any NA values in input to 0
df.fillna(0, inplace=True)

# extract target scenario and year
df = df.loc[(df['scenario'] == scenario) & (df['yr'] == target_yr)]

# create a catetory variable field
df['category'] = df['variable'].apply(lambda x: check_idx(x, 0, '|'))

# get only electricity capacity values
df = df.loc[(df['category'] == 'Electricity Capacity')]

# split out technology aggregated class (Biomass, Gas, etc.)
df['technology'] = df['variable'].apply(lambda x: check_idx(x, 1, '|'))

# exclude aggregated category that does not have specific technology breakouts
df = df.loc[df['technology'] != 'none']

df['subtech'] = df['variable'].apply(lambda x: check_idx(x, 2, '|'))
df['storage'] = df['variable'].apply(lambda x: check_idx(x, 3, '|'))

df.drop(['variable', 'category'], axis=1, inplace=True)

# convert GW to MW
df['value_mw'] = df['value_gw'] * 1000
df.drop('value_gw', axis=1, inplace=True)

# df['cerf_name'] = (df['technology'] + '_' + df['subtech'] + '_' + df['storage']).str.replace(' ', '_').str.lower().str.replace('_none', '')



# set cerf_name default
df['cerf_name'] = 'none'

# coal conventional pulverized without ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'Conv Pul') &
                           (df['storage'].isin(['wo CCS'])),
                           'coal_conv_pul_wo_ccs', df['cerf_name'])

# coal conventional pulverized with ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'Conv Pul') &
                           (df['storage'].isin(['w CCS'])),
                           'coal_conv_pul_w_ccs', df['cerf_name'])

# coal igcc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'wo CCS'),
                           'coal_igcc_wo_ccs', df['cerf_name'])
# coal igcc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Coal') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'w CCS'),
                           'coal_igcc_w_ccs', df['cerf_name'])

# gas ct with ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'].isin(['CT', 'ST'])) &
                           (df['storage'].isin(['w CCS'])),
                           'gas_cc_w_ccs', df['cerf_name'])

# gas ct without ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'].isin(['CT', 'ST'])) &
                           (df['storage'].isin(['wo CCS'])),
                           'gas_cc_wo_ccs', df['cerf_name'])

# gas cc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['w CCS'])),
                           'gas_cc_wo_ccs', df['cerf_name'])

# gas cc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Gas') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['wo CCS'])),
                           'gas_cc_w_ccs', df['cerf_name'])


# oil cc
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CC') &
                           (df['storage'].isin(['wo CCS', 'w CCS'])),
                           'oil_cc', df['cerf_name'])

# oil ct without ccs
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CT') &
                           (df['storage'].isin(['wo CCS'])),
                           'oil_ct_wo_ccs', df['cerf_name'])

# oil ct with ccs
df['cerf_name'] = np.where((df['technology'] == 'Oil') &
                           (df['subtech'] == 'CT') &
                           (df['storage'].isin(['w CCS'])),
                           'oil_ct_w_ccs', df['cerf_name'])

# biomass conventional without ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'Conv') &
                           (df['storage'].isin(['wo CCS'])),
                           'biomass_conv_wo_ccs', df['cerf_name'])

# biomass conventional with ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'Conv') &
                           (df['storage'].isin(['w CCS'])),
                           'biomass_conv_w_ccs', df['cerf_name'])

# biomass igcc without ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'wo CCS'),
                           'biomass_igcc_wo_ccs', df['cerf_name'])

# biomass igcc with ccs
df['cerf_name'] = np.where((df['technology'] == 'Biomass') &
                           (df['subtech'] == 'IGCC') &
                           (df['storage'] == 'w CCS'),
                           'biomass_igcc_w_ccs', df['cerf_name'])
# nuclear gen II
df['cerf_name'] = np.where((df['technology'] == 'Nuclear') &
                           (df['subtech'].isin(['Gen II'])),
                           'nuclear_gen_ii', df['cerf_name'])

# nuclear gen III
df['cerf_name'] = np.where((df['technology'] == 'Nuclear') &
                           (df['subtech'].isin(['Gen III'])),
                           'nuclear_gen_iii', df['cerf_name'])

# solar csp
df['cerf_name'] = np.where((df['technology'] == 'Solar') &
                           (df['subtech'].isin(['CSP'])) &
                           (df['storage'] != 'Dist') &
                           (df['storage'] != 'Non Dist'),
                           'solar_csp', df['cerf_name'])

# solar pv all
df['cerf_name'] = np.where((df['technology'] == 'Solar') &
                           (df['subtech'].isin(['PV'])) &
                           (df['storage'] != 'Dist') &
                           (df['storage'] != 'Non Dist'),
                           'solar_pv_non_dist', df['cerf_name'])

# wind onshore
df['cerf_name'] = np.where((df['technology'] == 'Wind') &
                           (df['subtech'] == 'Onshore'),
                           'wind_onshore', df['cerf_name'])

# drop none names
df.drop(['technology', 'subtech', 'storage', 'yr'], axis=1, inplace=True)
df = df.loc[df['cerf_name'] != 'none']


In [64]:
df.cerf_name.unique()

array(['biomass_conv_w_ccs', 'biomass_conv_wo_ccs', 'biomass_igcc_w_ccs',
       'biomass_igcc_wo_ccs', 'coal_conv_pul_w_ccs',
       'coal_conv_pul_wo_ccs', 'coal_igcc_w_ccs', 'coal_igcc_wo_ccs',
       'gas_cc_wo_ccs', 'gas_cc_w_ccs', 'nuclear_gen_ii',
       'nuclear_gen_iii', 'oil_ct_wo_ccs', 'solar_csp',
       'solar_pv_non_dist', 'wind_onshore'], dtype=object)

In [65]:
# get techs that have tech info
valid_techs = [tech_dict[i]['tech_name'] for i in tech_dict.keys()]

In [66]:
dfx = df.loc[df['cerf_name'].isin(valid_techs)]


In [67]:
dfx = dfx.groupby(['primary_zone', 'cerf_name']).sum()

dfx.reset_index(inplace=True)

dfx

Unnamed: 0,primary_zone,cerf_name,value_mw
0,AK,biomass_conv_w_ccs,0.000000
1,AK,biomass_conv_wo_ccs,0.000000
2,AK,biomass_igcc_w_ccs,0.000000
3,AK,biomass_igcc_wo_ccs,0.000000
4,AK,coal_conv_pul_w_ccs,0.000000
...,...,...,...
811,WY,nuclear_gen_iii,0.000000
812,WY,oil_ct_wo_ccs,105.084263
813,WY,solar_csp,10.425800
814,WY,solar_pv_non_dist,0.000000


In [68]:
tech_name_to_id

{'biomass_conv_wo_ccs': 0,
 'biomass_conv_w_ccs': 1,
 'biomass_igcc_wo_ccs': 2,
 'biomass_igcc_w_ccs': 3,
 'coal_conv_pul_wo_ccs': 4,
 'coal_conv_pul_w_ccs': 5,
 'coal_igcc_wo_ccs': 6,
 'coal_igcc_w_ccs': 7,
 'gas_cc_wo_ccs': 8,
 'gas_cc_w_ccs': 9,
 'gas_ct_wo_ccs': 10,
 'geothermal': 11,
 'hydro': 12,
 'nuclear_gen_ii': 13,
 'nuclear_gen_iii': 14,
 'oil_ct_wo_ccs': 15,
 'solar_csp': 16,
 'solar_pv_non_dist': 17,
 'wind_onshore': 18}

In [71]:
# only keep techs with something to site
total_cap_by_tech = dfx.groupby('cerf_name')['value_mw'].sum().to_dict()
techs_with_cap = [i for i in total_cap_by_tech.keys() if total_cap_by_tech[i] > 0]

dfx = dfx.loc[dfx['cerf_name'].isin(techs_with_cap)].copy()

dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw
1,AK,biomass_conv_wo_ccs,0.0
5,AK,coal_conv_pul_wo_ccs,90.89833
8,AK,gas_cc_w_ccs,801.072582
9,AK,gas_cc_wo_ccs,15.131252
10,AK,nuclear_gen_ii,0.0


In [72]:
techs_with_cap

['biomass_conv_wo_ccs',
 'coal_conv_pul_wo_ccs',
 'gas_cc_w_ccs',
 'gas_cc_wo_ccs',
 'nuclear_gen_ii',
 'oil_ct_wo_ccs',
 'solar_csp',
 'solar_pv_non_dist',
 'wind_onshore']

In [73]:
unit_size = {'biomass_conv_wo_ccs': 80,
             'coal_conv_pul_wo_ccs': 600,
             'gas_cc_w_ccs': 400,
             'gas_cc_wo_ccs': 400,
             'nuclear_gen_ii': 1350,
             'oil_ct_wo_ccs': 400,
             'solar_csp': 100,
             'solar_pv_non_dist': 80,
             'wind_onshore': 80}

tech_dict = {i}

{0: {'capacity_factor': 0.609,
  'variable_om': 10.0064557779212,
  'heat_rate': 12827.1,
  'fuel_price': 0.0,
  'fuel_esc_rate': 0.0,
  'fuel_co2_content': 0.3036,
  'tech_name': 'biomass_conv_wo_ccs',
  'lifetime': 60.0,
  'carbon_capture_rate': nan},
 1: {'capacity_factor': nan,
  'variable_om': 13.3634602969658,
  'heat_rate': 17408.2,
  'fuel_price': 0.0,
  'fuel_esc_rate': 0.0,
  'fuel_co2_content': 0.3036,
  'tech_name': 'biomass_conv_w_ccs',
  'lifetime': nan,
  'carbon_capture_rate': nan},
 2: {'capacity_factor': nan,
  'variable_om': 15.0096836668819,
  'heat_rate': 10662.5,
  'fuel_price': 0.0,
  'fuel_esc_rate': 0.0,
  'fuel_co2_content': 0.3036,
  'tech_name': 'biomass_igcc_wo_ccs',
  'lifetime': nan,
  'carbon_capture_rate': nan},
 3: {'capacity_factor': nan,
  'variable_om': 17.9470626210458,
  'heat_rate': 13022.9,
  'fuel_price': 0.0,
  'fuel_esc_rate': 0.0,
  'fuel_co2_content': 0.3036,
  'tech_name': 'biomass_igcc_w_ccs',
  'lifetime': nan,
  'carbon_capture_rate': n

In [16]:
# tech_dict = cerf.load_sample_config(2010).get('technology')

# tech_dict


In [17]:
tech_to_unitsize = {tech_dict[k]['tech_name']: tech_dict[k]['unit_size'] for k in tech_dict.keys()}

tech_to_unitsize


KeyError: 'unit_size'

In [10]:
dfx.cerf_name.unique()

array(['biomass', 'biomass_igcc_with_ccs', 'biomass_igcc_wo_ccs',
       'coal_conv', 'coal_igcc_with_ccs', 'coal_igcc_wo_ccs', 'gas_cc',
       'gas_turbine', 'nuclear', 'oil_turbine', 'solar_csp',
       'wind_onshore'], dtype=object)

In [11]:
dfx = dfx.loc[dfx['cerf_name'].isin(tech_to_unitsize.keys())].copy()


In [12]:
dfx['unit_size'] = dfx['cerf_name'].map(tech_to_unitsize)


In [13]:
dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size
0,AK,biomass,0.0,80
8,AK,nuclear,0.0,1350
12,AL,biomass,68.251397,80
20,AL,nuclear,5002.960845,1350
24,AR,biomass,14.238793,80


In [14]:
dfx['n_sites'] = np.floor(dfx['value_mw'] / dfx['unit_size']).astype(int)


In [15]:
dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size,n_sites
0,AK,biomass,0.0,80,0
8,AK,nuclear,0.0,1350,0
12,AL,biomass,68.251397,80,0
20,AL,nuclear,5002.960845,1350,3
24,AR,biomass,14.238793,80,0


In [16]:
dfx['carryover_mw'] = dfx['value_mw'] - (dfx['n_sites'] * dfx['unit_size'])


In [17]:
dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size,n_sites,carryover_mw
0,AK,biomass,0.0,80,0,0.0
8,AK,nuclear,0.0,1350,0,0.0
12,AL,biomass,68.251397,80,0,68.251397
20,AL,nuclear,5002.960845,1350,3,952.960845
24,AR,biomass,14.238793,80,0,14.238793


In [24]:
states_dict = cerf.get_state_abbrev_to_name()

dfx['state_name'] = dfx['primary_zone'].map(states_dict)

# keep only CONUS states
dfx = dfx.loc[~dfx['state_name'].isna()].copy()


In [25]:
dfx.head()

Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size,n_sites,carryover_mw,state_name
12,AL,biomass,68.251397,80,0,68.251397,alabama
20,AL,nuclear,5002.960845,1350,3,952.960845,alabama
24,AR,biomass,14.238793,80,0,14.238793,arkansas
32,AR,nuclear,1980.918253,1350,1,630.918253,arkansas
36,AZ,biomass,26.61795,80,0,26.61795,arizona


In [26]:
dfd = dfx[['']]


In [27]:
dfd.head()

Unnamed: 0,primary_zone,cerf_name,value_mw,unit_size,n_sites,carryover_mw,state_name
12,AL,biomass,68.251397,80,0,68.251397,alabama
20,AL,nuclear,5002.960845,1350,3,952.960845,alabama
24,AR,biomass,14.238793,80,0,14.238793,arkansas
32,AR,nuclear,1980.918253,1350,1,630.918253,arkansas
36,AZ,biomass,26.61795,80,0,26.61795,arizona


In [None]:
d