### Generating Eastern Hydro V3

In [1]:
import json
import pytz
import pandas as pd
import requests
import numpy as np
from collections import Counter, defaultdict
from timezonefinder import TimezoneFinder

from powersimdata.input.grid import Grid
from powersimdata.scenario.scenario import Scenario
from prereise.gather.helpers import trim_eia_form_923
from prereise.gather.hydrodata.eia.helpers import scale_profile

In [2]:
# Note that using current version of the grid won't be able to reproduce the eastern_hydro_v3 profile. 
# The purpose of this notebook is to illustrate the methodology we used to generate this profile.
eastern = Grid(['Eastern'])

Reading bus.csv
Reading plant.csv
Reading gencost.csv
Reading branch.csv
Reading dcline.csv
Reading sub.csv
Reading bus2sub.csv
Reading zone.csv


### Generate eastern pumped storage hydro profiles

In [None]:
# This step takes 30 sec to finish
# Generate hourly profile for pumped storage hydro based on a deterministic model described in 'hps_plants_eastern.xlsx'
eastern_hps = pd.read_excel(io='../../data/hps_plants_eastern.xlsx',sheet_name = 'all_plantIDs',header = 0)
time_index = pd.date_range(start='2016-01-01 00:00:00', end='2016-12-31 23:00:00', freq='H')
eastern_hydro_v3_hps = pd.DataFrame(index = time_index, columns = sorted(eastern_hps['PlantIDs']))

utc = pytz.utc
tf = TimezoneFinder()

for plantid in eastern_hydro_v3_hps.columns:
    lat = eastern.plant.loc[plantid,'lat']
    lon = eastern.plant.loc[plantid,'lon']
    capacity = eastern.plant.loc[plantid,'Pmax']
    tz_target = pytz.timezone(tf.certain_timezone_at(lat=lat, lng=lon))
    for time_ind in time_index:
        time_utc = utc.localize(time_ind)
        time_local = time_utc.astimezone(tz_target)
        # weekday, 0:Monday, 1:Tuesday, 2:Wednesday, 3:Thursday, 4:Friday
        if time_local.weekday() <= 4:
            if time_local.hour in {11,18}:
                eastern_hydro_v3_hps.loc[time_ind,plantid] = capacity*0.5
            if 11 < time_local.hour < 18:
                eastern_hydro_v3_hps.loc[time_ind,plantid] = capacity
eastern_hydro_v3_hps.fillna(0,inplace = True)

In [6]:
# Total HPS generation during the year turns out to be 30301.6GWh based on current approach, which is 50% higher than
# the reported number in EIA 923, i.e. 19884GWh. Hence, we decided to scale the current HPS profile down by 35%
eastern_hydro_v3_hps = eastern_hydro_v3_hps.apply(lambda x: x*0.65)

In [7]:
eastern_hydro_v3_hps.sum().sum()

20397404.72770007

### Generate eastern conventional hydro profiles
#### 1. Generate a mapping between each conventional hydro generator and BAs   

In [8]:
eastern_hyc_id_list = set(eastern.plant[eastern.plant['type'] == 'hydro'].index) - set(eastern_hps['PlantIDs'])
eastern_hyc = eastern.plant.loc[sorted(eastern_hyc_id_list)][['Pmax','lat','lon','zone_name']].copy()
eastern_hyc.loc[:,'County'] = None
eastern_hyc.loc[:,'BA'] = None

In [16]:
# It takes around 10 min to finish a pass
# api-endpoint 
URL = "https://geo.fcc.gov/api/census/block/find"
# defining a params dict for the parameters to be sent to the API
eastern_hyc_no_county_match = []
for index,row in eastern_hyc.iterrows():
    print(index)
    PARAMS = {'latitude':row['lat'], 'longitude':row['lon'], 'format':'json', 'showall': True} 
    # sending get request and saving the response as response object 
    r = requests.get(url = URL, params = PARAMS).json()
    try:
        county_name = r['County']['name']+'__'+r['State']['code']
        eastern_hyc.loc[index,'County'] = county_name
    except:
        eastern_hyc_no_county_match.append(index) 

5
6
7
8
9
10
12
13
14
15
16
17
18
21
22
23
24
25
26
27
28
29
30
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
64
65
66
67
68
69
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
104
105
106
107
108
109
110
111
112
117
119
120
121
122
123
124
126
127
128
129
130
132
133
134
135
136
137
138
139
144
145
146
147
148
149
150
151
152
153
154
155
156
158
159
160
161
162
163
164
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
186
187
188
189
190
191
192
193
194
195
196
197
198
206
207
208
209
210
211
219
220
221
222
223
224
225
226
227
228
229
230
231
237
238
239
240
241
253
259
260
261
262
263
264
265
266
267
268
269
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
294
295
296
297
298
299
300
301
302
303
305
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
333
334
335
339
340
341
342
343
344
345
346
347
348
349
353
354
355
356
357
3

6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7017
7018
7019
7020
7021
7022
7023
7024
7032
7033
7034
7043
7044
7045
7046
7047
7050
7051
7052
7053
7054
7055
7056
7061
7062
7063
7064
7065
7066
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7118
7119
7120
7121
7122
7123
7124
7125
7126
7138
7139
7140
7141
7142
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7195
7196
7197
7198
7199
7200
7207
7208
7209
7210
7211
7212
7222
7223
7235
7236
7237
7238
7239
7241
7242
7243
7244
7252
7253
7254
7255
7256
7257
7258
7259
7260
7264
7265
7277
7278
7279
7280
7281
7282
7283
7284
7285
7292
7293
7294
7295
7296
7297
7309
7310
7311
7322
7323
7325
7326
7327
7335
7336
7344
7345
7346
7347
7348
7349
7350
7351
7352
7365
7366
7367
7372
7373
7376
7377
7378
7379
7380
7381
7382
7383
7386
7387
7388
7389
7390
7391
7392
7399
7400
7401
7402
7403
7404
7405
7406


In [17]:
eastern_hyc_no_county_match

[]

In [20]:
data = json.load(open('../../../data/ba_to_county.txt'))
# df = pd.DataFrame(data['groups']['#cc3333']['paths'])
ba_county_list = {}
for val in data['groups'].values():
    ba_county_list[val['label']] = set(val['paths'])
    
# eastern_hyc = pd.read_csv('eastern_hyc_to_county_raw.csv',index_col=0)

for index,row in eastern_hyc.iterrows():
    for BA,clist in ba_county_list.items():
        try:
            county = row['County'].replace(' ','_')
            county = county.replace('.','')
            county = county.replace('-','')
            county = county.replace('\'','_')
            if row['County'] == 'LaSalle__IL':
                county = 'La_Salle__IL'
            if row['County'] == 'Lac Qui Parle__MN':
                county = 'Lac_qui_Parle__MN'
            if row['County'] == 'Baltimore__MD':
                county = 'Baltimore_County__MD'
            if row['County'] == 'District of Columbia__DC':
                county = 'Washington__DC'
            if row['County'] == 'St. Louis City__MO':
                county = 'St_Louis_Co__MO'
            if county in clist:
                eastern_hyc.loc[index,'BA'] = BA
                break
        except:
            continue
            
eastern_hyc_no_BA_match = list(eastern_hyc[eastern_hyc['BA'].isna()].index)

# Fix mismatch county names in Virginia Mountains
for ind in eastern_hyc_no_BA_match:
    if eastern_hyc.loc[ind,'zone_name'] == 'Virginia Mountains':
        eastern_hyc.loc[ind,'BA'] = 'PJM'

eastern_hyc_no_BA_match = list(eastern_hyc[eastern_hyc['BA'].isna()].index)

# Assign the rest no-ba-match buses to SWPP
for ind in eastern_hyc_no_BA_match:
    eastern_hyc.loc[ind,'BA'] = 'SWPP'

In [21]:
eastern_hyc_no_BA_match = list(eastern_hyc[eastern_hyc['BA'].isna()].index)
eastern_hyc_no_BA_match

[]

In [22]:
eastern_hyc.BA.unique()

array(['ISONE', 'NYISO', 'PJM', 'Carolina', 'TVA_LGEE', 'SOCO', 'AEC',
       'MISO', 'SWPP'], dtype=object)

In [23]:
eastern_hyc.to_csv('eastern_hyc_to_BA.csv')

#### 2. Decompose 2016 total hydro profiles of ISONE, NYISO, PJM, SWPP into plant level profiles in the corresponding region

In [24]:
# Load total profiles of ISONE, NYISO, PJM and SWPP
isone_hydro = pd.read_csv('./prepared_timeseries/ne_iso_2016_hydro.csv', index_col = 0)
nyiso_hydro = pd.read_csv('./prepared_timeseries/ny_iso_2016_hydro.csv', index_col = 0)
pjm_hydro = pd.read_csv('./prepared_timeseries/pjm_hydro_2016.csv', index_col = 0)
swpp_hydro = pd.read_csv('./prepared_timeseries/southwest_power_pool_2016_hydro.csv', index_col = 0)

In [110]:
def get_hourly_plant_level_profile(plant_df,total_profile):
    total_hydro_capacity = plant_df['Pmax'].sum()
    res_profile = pd.DataFrame(columns = plant_df.index)

    for plantid in res_profile.columns:
        if total_hydro_capacity == 0:
            factor = 0
        else:
            factor = plant_df.loc[plantid]['Pmax']/total_hydro_capacity
        plant_profile = [val*factor for val in total_profile]
        res_profile[plantid] = plant_profile.copy()
    return res_profile

In [28]:
hydro_v3_isone = get_hourly_plant_level_profile(eastern_hyc[eastern_hyc['BA'] == 'ISONE'],list(isone_hydro['hydro']))

In [29]:
hydro_v3_nyiso = get_hourly_plant_level_profile(eastern_hyc[eastern_hyc['BA'] == 'NYISO'],list(nyiso_hydro['GenMWh']))

In [30]:
hydro_v3_pjm = get_hourly_plant_level_profile(eastern_hyc[eastern_hyc['BA'] == 'PJM'],list(pjm_hydro['hydro']))

In [31]:
hydro_v3_swpp = get_hourly_plant_level_profile(eastern_hyc[eastern_hyc['BA'] == 'SWPP'],list(swpp_hydro['hydro']))

In [32]:
hydro_v3_isone.index = eastern_hydro_v3_hps.index
hydro_v3_nyiso.index = eastern_hydro_v3_hps.index
hydro_v3_pjm.index = eastern_hydro_v3_hps.index
hydro_v3_swpp.index = eastern_hydro_v3_hps.index

#### 3. For the hydro plants in the rest of the area, using the same methodology as we did in CA, i.e. scale the hourly net demand profile based on the monthly total net generation of conventional hydro reported in EIA 923 in each state, then decompose into plant level profile based on the corresponding plant capacities.

In [33]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}
us_abbrev_state = {val:key for key,val in us_state_abbrev.items()}

In [34]:
eastern_state_list = set(us_state_abbrev.values()) - {'DC','WA','OR','CA','NV','ID','UT','AZ','WY','CO','AK','HI'}
eastern_state_abbrev_to_loadzone = {}
for state in eastern_state_list:
    if us_abbrev_state[state] in eastern.zone2id:
        eastern_state_abbrev_to_loadzone[state] = [us_abbrev_state[state]]
eastern_state_abbrev_to_loadzone['FL'] = ['Florida Panhandle','Florida North','Florida South']
eastern_state_abbrev_to_loadzone['GA'] = ['Georgia North','Georgia South']
eastern_state_abbrev_to_loadzone['IL'] = ['Chicago North Illinois','Illinois Downstate']
eastern_state_abbrev_to_loadzone['MI'] = ['Michigan Northern','Michigan Southern']
eastern_state_abbrev_to_loadzone['MN'] = ['Minnesota Northern','Minnesota Southern']
eastern_state_abbrev_to_loadzone['MO'] = ['Missouri East','Missouri West']
eastern_state_abbrev_to_loadzone['MT'] = ['Montana Eastern']
eastern_state_abbrev_to_loadzone['NM'] = ['New Mexico Eastern']
eastern_state_abbrev_to_loadzone['NY'] = ['New York City','Upstate New York']
eastern_state_abbrev_to_loadzone['OH'] = ['Ohio River','Ohio Lake Erie']
eastern_state_abbrev_to_loadzone['PA'] = ['Pennsylvania Eastern','Pennsylvania Western']
eastern_state_abbrev_to_loadzone['VA'] = ['Virginia Mountains','Virginia Tidewater']
eastern_state_abbrev_to_loadzone['NC'].append('Western North Carolina')
eastern_state_abbrev_to_loadzone['TX'] = ['East Texas','Texas Panhandle']

In [35]:
eastern_loadzone_to_state_abbrev = {}
for key,val in eastern_state_abbrev_to_loadzone.items():
    for lz in val:
        eastern_loadzone_to_state_abbrev[lz] = key

In [38]:
state_ba_fraction = defaultdict(lambda: defaultdict(float))
ba_name = {'ISONE','NYISO','SWPP','PJM'}
for index,row in eastern_hyc.iterrows():
    if row['BA'] in ba_name:
        state_ba_fraction[eastern_loadzone_to_state_abbrev[row['zone_name']]][row['BA']] += row['Pmax']
    state_ba_fraction[eastern_loadzone_to_state_abbrev[row['zone_name']]]['total'] += row['Pmax']

In [39]:
state_ba_fraction

defaultdict(<function __main__.<lambda>()>,
            {'ME': defaultdict(float,
                         {'ISONE': 714.7999999999985,
                          'total': 714.7999999999985}),
             'NH': defaultdict(float,
                         {'ISONE': 424.8070000000001,
                          'total': 424.8070000000001}),
             'VT': defaultdict(float,
                         {'ISONE': 327.4110000000001,
                          'total': 327.4110000000001}),
             'MA': defaultdict(float,
                         {'ISONE': 268.8959999999998,
                          'total': 268.8959999999998}),
             'RI': defaultdict(float, {'ISONE': 2.8, 'total': 2.8}),
             'CT': defaultdict(float,
                         {'ISONE': 111.3, 'total': 118.5, 'NYISO': 7.2}),
             'NY': defaultdict(float,
                         {'NYISO': 4674.361000000001,
                          'total': 4674.361000000001}),
             'NJ': defaultdict(floa

In [40]:
def get_monthly_net_generation_eastern(state,filename,fuel_type,trim_eia_form_923):
    """Get monthly total net generation for the query fuel type in the query state in 2016 from EIA923
    :param str state: the query state
    :param str filename: name of the reference file
    :param str fuel_type: the query type of fuel
    :return: (*list*) EIA_net_generation -- monthly net generation of the query fuel type in the query 
        state in a list of length 12
    :raise ValueError: if state is invalid.
    :raise ValueError: if fuel_type is invalid.
    """
    
    state_name = ['ME', 'PA', 'IN', 'MI', 'IA', 'SC', 'MT', 'NJ', 'CT', 'LA', 'IL', 'MO', \
                  'MS', 'VT', 'KS', 'MD', 'WI', 'AR', 'TX', 'SD', 'WV', 'AL', 'FL', 'NC', \
                  'RI', 'NE', 'MN', 'VA', 'KY', 'MA', 'OH', 'NM', 'NH', 'GA', 'ND', 'TN', \
                  'OK', 'NY', 'DE']
    if state not in state_name:
        print("%s is incorrect. Possible states are: %s" %
              (state, state_name))
        raise ValueError('Invalid state')
    
    all_fuel_type = {'solar':{'SUN'},
                     'coal':{'COL'},
                     'dfo':{'DFO'},
                     'geothermal':{'GEO'},
                     'hydro':{'HYC'}, # Hydroelectric Conventional only, Hydroelectric Pumped Storage is handled separately
                     'ng':{'NG'},
                     'nuclear':{'NUC'},
                     'wind':{'WND'},
                     }
    if fuel_type not in all_fuel_type:
        print("%s is incorrect. Possible fuel types are: %s" %
              (fuel_type, list(all_fuel_type.keys())))
        raise ValueError('Invalid fuel_type')
    
    # Get trimmed EIA form 923 with only necessary columns
    plant_generation = trim_eia_form_923(filename)
    
    # Filter by state and fuel type
    net_generation_by_plant = plant_generation[(plant_generation['Plant State'] == state) &
                                               (plant_generation['AER\nFuel Type Code']. \
                                               isin(all_fuel_type[fuel_type]))].copy()
    
    # Drop unnecessary columns, plant ID, etc..
    net_generation = net_generation_by_plant.drop(net_generation_by_plant.columns[[0,1,2,3,4]], axis=1)
    net_generation = net_generation.replace('.',0)
    
    # Get monthly total net generation by summing up across plants with all positive values 
    # Note that negative ones are included in actual demand
    EIA_net_generation = list(net_generation.apply(lambda x: x[x>0].sum()).values)
    
    # If there is no such generator in the query state, the function will return a list of 0.0 instead of nan
    EIA_net_generation = list(np.nan_to_num(EIA_net_generation))
    
    return EIA_net_generation

In [41]:
eia_923_filename = 'EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx'

In [42]:
# Observe from state_ba_fraction, there is no such state that overlaps with PJM and SWPP simultaneously
eastern_monthly_hyc_rest = {}
for state,ba in state_ba_fraction.items():
    if len(ba) == 1:
        eastern_monthly_hyc_rest[state] = get_monthly_net_generation_eastern(state,eia_923_filename,'hydro',trim_eia_form_923)
    elif 'PJM' in ba:
        total_state_profile = get_monthly_net_generation_eastern(state,eia_923_filename,'hydro',trim_eia_form_923)
        frac = 1-(ba['PJM']/ba['total'])
        if frac > 0:
            eastern_monthly_hyc_rest[state] = [val*frac for val in total_state_profile]
    elif 'SWPP' in ba:       
        total_state_profile = get_monthly_net_generation_eastern(state,eia_923_filename,'hydro',trim_eia_form_923)
        frac = 1-(ba['SWPP']/ba['total'])
        if frac > 0:
            eastern_monthly_hyc_rest[state] = [val*frac for val in total_state_profile]

In [43]:
eastern_monthly_hyc_rest

{'NC': [897550.942832946,
  687377.8201150205,
  470757.2085256591,
  197692.52731305058,
  289090.372128626,
  205855.94850074418,
  226350.48146545666,
  279943.9568683796,
  184243.43749774765,
  267199.82125436416,
  118818.55137037068,
  143869.9881562513],
 'SC': [650400.0149999999,
  291852.80500000005,
  244716.38299999994,
  158837.48400000003,
  185628.43200000006,
  113248.274,
  115419.744,
  103064.988,
  97275.33300000003,
  105693.49399999998,
  76075.158,
  84202.88999999998],
 'GA': [603116.597,
  353964.99,
  299980.1340000001,
  217673.269,
  184329.93500000003,
  187193.28,
  165236.813,
  202991.78900000002,
  161282.09000000003,
  171315.29,
  162702.82399999996,
  184747.989],
 'FL': [35240.888,
  25956.167,
  20033.068,
  12362.677,
  15191.995,
  10095.299,
  9613.356000000002,
  10596.578,
  7641.9349999999995,
  10878.378,
  6880.423000000001,
  10060.236],
 'AL': [1787712.6620000002,
  1583810.1050000002,
  1106115.4479999999,
  569458.6699999999,
  283491.9

In [108]:
# For Montana, we only have 5 hyc generators (1 plant) in Eastern, which we found the corresponding real plant in EIA 923
# eastern_monthly_hyc_rest['MT'] = [45626.74, 41389.969, 56672.792, 72565.174, 109094.59, 81250.23, \
#                                  59307.364, 40236.559, 39401.824, 46421.753, 84258.771, 88366.23]
eastern_monthly_hyc_rest['MT'] = [0]*12

In [45]:
eastern_hyc[eastern_hyc['zone_name'] == 'Montana Eastern']

Unnamed: 0_level_0,Pmax,lat,lon,zone_name,County,BA
plant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10378,0.0,48.0166,-106.419,Montana Eastern,McCone__MT,MISO
10379,0.0,48.0166,-106.419,Montana Eastern,McCone__MT,MISO
10380,0.0,48.0166,-106.419,Montana Eastern,McCone__MT,MISO
10381,0.0,48.0166,-106.419,Montana Eastern,McCone__MT,MISO
10382,0.0,48.0166,-106.419,Montana Eastern,McCone__MT,MISO


In [46]:
# For Eastern Texas, we only have 5 hyc generators (2 plants) in Eastern, which we found the corresponding real plants in EIA 923
eastern_monthly_hyc_rest['TX'] = [8430, 8091, 9172, 16705, 23493, 26282, 5259, 7536, 7229, 2376, 5503, 2665]

In [47]:
eastern_hyc[eastern_hyc['zone_name'] == 'East Texas']

Unnamed: 0_level_0,Pmax,lat,lon,zone_name,County,BA
plant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9204,0.286,31.0424,-94.0865,East Texas,Jasper__TX,MISO
9205,2.29,31.0424,-94.0865,East Texas,Jasper__TX,MISO
9206,2.29,31.0424,-94.0865,East Texas,Jasper__TX,MISO
9207,14.883,31.0424,-94.0865,East Texas,Jasper__TX,MISO
9208,14.883,31.0424,-94.0865,East Texas,Jasper__TX,MISO
9209,23.184,31.1976,-93.5861,East Texas,Sabine__LA,MISO
9210,23.184,31.1976,-93.5861,East Texas,Sabine__LA,MISO


#### Get net demand for each state to define the hourly shape of HYC in the rest of states not covered by the four BAs.

In [50]:
from powersimdata.utility import transfer_data as td
from powersimdata.input.input_data import InputData

In [61]:
ssh_client = td.setup_server_connection()
input_data = InputData(ssh_client=ssh_client)
scenario_info = {"interconnect": "Eastern", "base_solar": "v4.2", "base_wind": "v5.3", "base_demand": "v6"}
solar = input_data.get_data(scenario_info, "solar")
wind = input_data.get_data(scenario_info, "wind")
demand = input_data.get_data(scenario_info, "demand")

--> Loading solar
--> Loading wind
--> Loading demand


In [58]:
wind.columns = [int(col) for col in wind.columns]
solar.columns = [int(col) for col in solar.columns]
demand.columns = [int(col) for col in demand.columns]

def get_net_demand_profile_eastern(state):
    """Get the net demand profile of a specific state based on Western basecase scenario 2016
    :param str state: the query state
    :param powersimdata.scenario.scenario.Scenario s: scenario instance
    :return: (*list*) netdemand -- net demand profile of the query state in a list of length 8784
    :raise ValueError: if state is invalid.
    """

    state_name ={'ME': ['Maine'],
                 'IN': ['Indiana'],
                 'IA': ['Iowa'],
                 'SC': ['South Carolina'],
                 'NJ': ['New Jersey'],
                 'CT': ['Connecticut'],
                 'LA': ['Louisiana'],
                 'MS': ['Mississippi'],
                 'VT': ['Vermont'],
                 'KS': ['Kansas'],
                 'MD': ['Maryland'],
                 'WI': ['Wisconsin'],
                 'AR': ['Arkansas'],
                 'SD': ['South Dakota'],
                 'WV': ['West Virginia'],
                 'AL': ['Alabama'],
                 'NC': ['North Carolina', 'Western North Carolina'],
                 'RI': ['Rhode Island'],
                 'NE': ['Nebraska'],
                 'KY': ['Kentucky'],
                 'MA': ['Massachusetts'],
                 'NH': ['New Hampshire'],
                 'ND': ['North Dakota'],
                 'TN': ['Tennessee'],
                 'OK': ['Oklahoma'],
                 'DE': ['Delaware'],
                 'FL': ['Florida Panhandle', 'Florida North', 'Florida South'],
                 'GA': ['Georgia North', 'Georgia South'],
                 'IL': ['Chicago North Illinois', 'Illinois Downstate'],
                 'MI': ['Michigan Northern', 'Michigan Southern'],
                 'MN': ['Minnesota Northern', 'Minnesota Southern'],
                 'MO': ['Missouri East', 'Missouri West'],
                 'MT': ['Montana Eastern'],
                 'NM': ['New Mexico Eastern'],
                 'NY': ['New York City', 'Upstate New York'],
                 'OH': ['Ohio River', 'Ohio Lake Erie'],
                 'PA': ['Pennsylvania Eastern', 'Pennsylvania Western'],
                 'VA': ['Virginia Mountains', 'Virginia Tidewater'],
                 'TX': ['East Texas', 'Texas Panhandle']}             
    if state not in state_name:
        print("%s is incorrect. Possible states are: %s" %
              (state, list(state_name.keys())))
        raise ValueError('Invalid state')
        
    # load Western basecase scenario 2016 and the corresponding profiles
#     wind = s.state.get_wind()
#     solar = s.state.get_solar()
#     demand = s.state.get_demand()
#     western = Grid(['Western'])
#     plant = western.plant
    plant = eastern.plant
                  
    wind_plant_id_list = list(plant[(plant.type == 'wind') & (plant.zone_name.isin(state_name[state]))].index)
    solar_plant_id_list = list(plant[(plant.type == 'solar') & (plant.zone_name.isin(state_name[state]))].index)
    windsum = wind[wind_plant_id_list].sum(axis = 1)
    solarsum = solar[solar_plant_id_list].sum(axis = 1)
    
    zone_id_list = [eastern.zone2id[zonename] for zonename in state_name[state]]
    demandsum = demand[zone_id_list].sum(axis = 1)
    netdemand = [demandsum[i] - windsum[i] - solarsum[i] for i in demandsum.index]
    netdemand = [nd if nd >= 0 else 0 for nd in netdemand]
    
    return netdemand

In [59]:
eastern_net_demand_state_rest = {}
for state in eastern_monthly_hyc_rest:
    eastern_net_demand_state_rest[state] = get_net_demand_profile_eastern(state)

In [72]:
# Scale hourly net demand profile based on monthly net generation to get the HYC hourly total profile of the corresponding state
eastern_hyc_hourly_total_state_rest = {}
for state in eastern_net_demand_state_rest:
    eastern_hyc_hourly_total_state_rest[state] = scale_profile(pd.Series(eastern_net_demand_state_rest[state], index=demand.index), eastern_monthly_hyc_rest[state])

In [111]:
# Decompose HYC hourly total profile into plant level profiles proportional to plant capacities
# Two HYC generators [9209,9210] in Louisiana are placed in East Texas loadzone, put them back to LA when generating plant level profiles.
hydro_v3_rest_state = {}
for state in eastern_hyc_hourly_total_state_rest:
    plantlist = list(eastern_hyc[(eastern_hyc['zone_name'].apply(lambda x: eastern_loadzone_to_state_abbrev[x]) == state) & (~eastern_hyc['BA'].isin({'ISONE','NYISO','SWPP','PJM'}))].index)
    if state == 'TX':
        plantlist.remove(9209)
        plantlist.remove(9210)
    if state == 'LA':
        plantlist.append(9209)
        plantlist.append(9210)
    plant_df = eastern_hyc.loc[plantlist].copy()
    hydro_v3_rest_state[state] = get_hourly_plant_level_profile(plant_df,eastern_hyc_hourly_total_state_rest[state])

In [99]:
sum(eastern_hyc_hourly_total_state_rest['MT'])

764591.9959999998

In [93]:
plant_df.index

Int64Index([10378, 10379, 10380, 10381, 10382], dtype='int64', name='plant_id')

In [74]:
# Generate final eastern hydro v3
eastern_hydro_v3 = pd.concat(list(hydro_v3_rest_state.values()),axis = 1)

In [75]:
eastern_hydro_v3.index = eastern_hydro_v3_hps.index
eastern_hydro_v3 = pd.concat([eastern_hydro_v3, eastern_hydro_v3_hps],axis = 1)

In [76]:
eastern_hydro_v3 = pd.concat([eastern_hydro_v3, hydro_v3_isone, hydro_v3_nyiso, hydro_v3_pjm, hydro_v3_swpp],axis = 1)

In [77]:
eastern_hydro_v3 = eastern_hydro_v3[sorted(eastern_hydro_v3.columns)]

In [78]:
eastern_hydro_v3.to_csv('eastern_hydro_v3.csv')

### Sanity Check

In [79]:
(eastern_hydro_v3 < 0).sum().sum()

0

In [80]:
eastern_hydro_v3.sum().sum()

113447046.41682649

In [81]:
EIA923 = trim_eia_form_923(eia_923_filename)

In [82]:
net_generation_by_plant = EIA923[(EIA923['Plant State'].isin(eastern_state_abbrev_to_loadzone)) & (EIA923['AER\nFuel Type Code'].isin({'HYC','HPS'}))].copy()
net_generation_by_plant = net_generation_by_plant.drop(net_generation_by_plant.columns[[0,1,2,3,4]], axis=1).replace('.',0)
sum(list(net_generation_by_plant.apply(lambda x: x[x>0].sum()).values))

103173825.93

In [113]:
ck1 = 0
ck2 = 0
for val in eastern_monthly_hyc_rest.values():
    ck1 += sum(val)
for val in hydro_v3_rest_state.values():
    ck2 += val.sum().sum()
print(ck1,ck2)

38975320.81739512 38975320.81739514


In [84]:
eastern_hydro_v3_hps.sum().sum()

20397404.72770007

In [85]:
hydro_v3_isone.sum().sum() + hydro_v3_nyiso.sum().sum() + hydro_v3_pjm.sum().sum() + hydro_v3_swpp.sum().sum()

54074320.87173128

In [86]:
39739893.901992 + 19696047.204984646 + 54074320.87173126

113510261.97870791

In [87]:
# Compare 2016 total conventional hydro generation using our method with external profiles, this is just a sanity check
# We directly use external profiles in ISONE, NYISO, PJM and SWPP for conventional hydro in those regions
sum_total = []
ba_check = 'PJM'
for state,ba in state_ba_fraction.items():
    if ba_check in ba:
        print(state)
        monthly_total = get_monthly_net_generation_eastern(state,eia_923_filename,'hydro',trim_eia_form_923)
        if ba['total'] != ba[ba_check]:
            sum_total.append(sum(monthly_total)*ba[ba_check]/ba['total'])
        else:
            sum_total.append(sum(monthly_total))
sum_total

NJ
PA
MD
VA
NC
KY
WV
OH
MI
IL


[9478.000000000002,
 2374519.0,
 1392187.0,
 1471462.0,
 448434.9439713835,
 1601630.483650435,
 1638247.0000000002,
 500319.99999999994,
 55323.96201082353,
 76943.87198347521]

In [88]:
sum(sum_total)

9568546.261616118

In [89]:
hydro_v3_pjm.sum().sum()

11899842.099999998