-
Notifications
You must be signed in to change notification settings - Fork 28
/
add_crosswalk.py
executable file
·445 lines (379 loc) · 19.5 KB
/
add_crosswalk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
#!/usr/bin/env python3
import argparse
import json
import sys
import geopandas as gpd
import pandas as pd
from numpy import unique
from rasterstats import zonal_stats
from utils.fim_enums import FIM_exit_codes
from utils.shared_functions import getDriver
from utils.shared_variables import FIM_ID
# TODO - Feb 17, 2023 - We want to explore using FR methodology as branch zero
def add_crosswalk(
input_catchments_fileName,
input_flows_fileName,
input_srcbase_fileName,
output_catchments_fileName,
output_flows_fileName,
output_src_fileName,
output_src_json_fileName,
output_crosswalk_fileName,
output_hydro_table_fileName,
input_huc_fileName,
input_nwmflows_fileName,
input_nwmcatras_fileName,
mannings_n,
input_nwmcat_fileName,
extent,
small_segments_filename,
min_catchment_area,
min_stream_length,
calibration_mode=False,
):
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
min_catchment_area = float(min_catchment_area) # 0.25#
min_stream_length = float(min_stream_length) # 0.5#
input_catchments = input_catchments.dissolve(by='HydroID').reset_index()
## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")
# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')
input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
input_nwmflows = input_nwmflows.set_index('feature_id')
# Get stream midpoint
stream_midpoint = []
hydroID = []
for i, lineString in enumerate(input_flows.geometry):
hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]
input_flows_midpoint = gpd.GeoDataFrame(
{'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
)
input_flows_midpoint = input_flows_midpoint.set_index('HydroID')
# Create crosswalk
crosswalk = gpd.sjoin_nearest(
input_flows_midpoint, input_nwmflows, how='left', distance_col='distance'
).reset_index()
crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})
crosswalk.loc[crosswalk['distance'] > 100.0, 'feature_id'] = pd.NA
crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')
if crosswalk.empty:
print("No relevant streams within HUC boundaries.")
sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')
if output_catchments.empty:
print("No valid catchments remain.")
sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')
# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')
output_flows['ManningN'] = mannings_n
if output_flows.NextDownID.dtype != 'int':
output_flows.NextDownID = output_flows.NextDownID.astype(int)
# Adjust short model reach rating curves
print('Adjusting model reach rating curves')
sml_segs = pd.DataFrame()
# replace small segment geometry with neighboring stream
for stream_index in output_flows.index:
if (
output_flows["areasqkm"][stream_index] < min_catchment_area
and output_flows["LengthKm"][stream_index] < min_stream_length
and output_flows["LakeID"][stream_index] < 0
):
short_id = output_flows['HydroID'][stream_index]
to_node = output_flows['To_Node'][stream_index]
from_node = output_flows['From_Node'][stream_index]
# multiple upstream segments
if len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) > 1:
try:
# drainage area would be better than stream order but we would need to calculate
max_order = max(output_flows.loc[output_flows['NextDownID'] == short_id]['order_'])
except Exception as e:
print(
f"short_id: {short_id} cannot calculate max stream order for "
f"multiple upstream segments scenario. \n Exception: \n {repr(e)} \n"
)
if (
len(
output_flows.loc[
(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)
]['HydroID']
)
== 1
):
update_id = output_flows.loc[
(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)
]['HydroID'].item()
else:
# Get the first one
# (same stream order, without drainage area info, hard to know which is the main channel)
update_id = output_flows.loc[
(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)
]['HydroID'].values[0]
# single upstream segments
elif len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) == 1:
update_id = output_flows.loc[output_flows.To_Node == from_node]['HydroID'].item()
# no upstream segments; multiple downstream segments
elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) > 1:
try:
max_order = max(
output_flows.loc[output_flows.From_Node == to_node]['order_']
) # drainage area would be better than stream order but we would need to calculate
except Exception as e:
print(
f"To Node {to_node} cannot calculate max stream order for no upstream segments; "
"multiple downstream segments scenario. "
f"Exception \n {repr(e)} \n"
)
if (
len(
output_flows.loc[
(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)
]['HydroID']
)
== 1
):
update_id = output_flows.loc[
(output_flows.From_Node == to_node) & (output_flows['order_'] == max_order)
]['HydroID'].item()
# output_flows has a higher order than the max_order
elif output_flows.loc[(output_flows.From_Node == to_node), 'order_'].max() > max_order:
update_id = output_flows.loc[
(output_flows.From_Node == to_node)
& (
output_flows['order_']
== output_flows.loc[(output_flows.From_Node == to_node), 'order_'].max()
)
]['HydroID'].values[0]
# Get the first one
# Same stream order, without drainage area info it is hard to know which is the main channel.
else:
if max_order in output_flows.loc[output_flows.From_Node == to_node, 'order_'].values:
update_id = output_flows.loc[
(output_flows.From_Node == to_node) & (output_flows['order_'] == max_order)
]['HydroID'].values[0]
else:
update_id = output_flows.loc[
(output_flows.From_Node == to_node)
& (
output_flows['order_']
== output_flows.loc[output_flows.From_Node == to_node, 'order_'].max()
)
]['HydroID'].values[0]
# no upstream segments; single downstream segment
elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) == 1:
update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item()
else:
update_id = output_flows[output_flows.HydroID == short_id]['HydroID'].iloc[0]
output_order = output_flows.loc[output_flows.HydroID == short_id]['order_']
if len(output_order) == 1:
str_order = output_order.item()
else:
str_order = output_order.max()
sml_segs = pd.concat(
[
sml_segs,
pd.DataFrame(
{'short_id': [short_id], 'update_id': [update_id], 'str_order': [str_order]}
),
],
ignore_index=True,
)
print(
f"Number of short reaches [areasqkm < {min_catchment_area} and LengthKm < {min_stream_length}] = "
f"{len(sml_segs)}"
)
# calculate src_full
input_src_base = pd.read_csv(input_srcbase_fileName, dtype=object)
if input_src_base.CatchId.dtype != 'int':
input_src_base.CatchId = input_src_base.CatchId.astype(int)
input_src_base = input_src_base.merge(
output_flows[['ManningN', 'HydroID', 'NextDownID', 'order_']], left_on='CatchId', right_on='HydroID'
)
input_src_base = input_src_base.rename(columns=lambda x: x.strip(" "))
input_src_base = input_src_base.apply(pd.to_numeric, **{'errors': 'coerce'})
input_src_base['TopWidth (m)'] = input_src_base['SurfaceArea (m2)'] / input_src_base['LENGTHKM'] / 1000
input_src_base['WettedPerimeter (m)'] = input_src_base['BedArea (m2)'] / input_src_base['LENGTHKM'] / 1000
input_src_base['WetArea (m2)'] = input_src_base['Volume (m3)'] / input_src_base['LENGTHKM'] / 1000
input_src_base['HydraulicRadius (m)'] = (
input_src_base['WetArea (m2)'] / input_src_base['WettedPerimeter (m)']
)
input_src_base['HydraulicRadius (m)'].fillna(0, inplace=True)
input_src_base['Discharge (m3s-1)'] = (
input_src_base['WetArea (m2)']
* pow(input_src_base['HydraulicRadius (m)'], 2.0 / 3)
* pow(input_src_base['SLOPE'], 0.5)
/ input_src_base['ManningN']
)
# set nans to 0
input_src_base.loc[input_src_base['Stage'] == 0, ['Discharge (m3s-1)']] = 0
input_src_base['Bathymetry_source'] = pd.NA
output_src = input_src_base.drop(columns=['CatchId']).copy()
if output_src.HydroID.dtype != 'int':
output_src.HydroID = output_src.HydroID.astype(int)
# update rating curves
if len(sml_segs) > 0:
sml_segs.to_csv(small_segments_filename, index=False)
print("Update rating curves for short reaches.")
for index, segment in sml_segs.iterrows():
short_id = segment[0]
update_id = segment[1]
new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']]
for src_index, src_stage in new_values.iterrows():
output_src.loc[
(output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]),
['Discharge (m3s-1)'],
] = src_stage[1]
output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
output_crosswalk = output_src[['HydroID', 'feature_id']]
output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)
# make hydroTable
output_hydro_table = output_src.loc[
:,
[
'HydroID',
'feature_id',
'NextDownID',
'order_',
'Number of Cells',
'SurfaceArea (m2)',
'BedArea (m2)',
'TopWidth (m)',
'LENGTHKM',
'AREASQKM',
'WettedPerimeter (m)',
'HydraulicRadius (m)',
'WetArea (m2)',
'Volume (m3)',
'SLOPE',
'ManningN',
'Stage',
'Discharge (m3s-1)',
],
]
output_hydro_table.rename(columns={'Stage': 'stage', 'Discharge (m3s-1)': 'discharge_cms'}, inplace=True)
# Set placeholder variables to be replaced in post-processing (as needed).
# Create here to ensure consistent column vars. These variables represent the original unmodified values
output_hydro_table['default_discharge_cms'] = output_src['Discharge (m3s-1)']
output_hydro_table['default_Volume (m3)'] = output_src['Volume (m3)']
output_hydro_table['default_WetArea (m2)'] = output_src['WetArea (m2)']
output_hydro_table['default_HydraulicRadius (m)'] = output_src['HydraulicRadius (m)']
output_hydro_table['default_ManningN'] = output_src['ManningN']
# Placeholder vars for BARC
output_hydro_table['Bathymetry_source'] = pd.NA
# Placeholder vars for subdivision routine
output_hydro_table['subdiv_applied'] = False
output_hydro_table['overbank_n'] = pd.NA
output_hydro_table['channel_n'] = pd.NA
output_hydro_table['subdiv_discharge_cms'] = pd.NA
# Placeholder vars for the calibration routine
output_hydro_table['calb_applied'] = False
output_hydro_table['last_updated'] = pd.NA
output_hydro_table['submitter'] = pd.NA
output_hydro_table['obs_source'] = pd.NA
output_hydro_table['precalb_discharge_cms'] = pd.NA
output_hydro_table['calb_coef_usgs'] = pd.NA
output_hydro_table['calb_coef_ras2fim'] = pd.NA
output_hydro_table['calb_coef_spatial'] = pd.NA
output_hydro_table['calb_coef_final'] = pd.NA
if output_hydro_table.HydroID.dtype != 'str':
output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str)
output_hydro_table[FIM_ID] = output_hydro_table.loc[:, 'HydroID'].apply(lambda x: str(x)[0:4])
if input_huc[FIM_ID].dtype != 'str':
input_huc[FIM_ID] = input_huc[FIM_ID].astype(str)
output_hydro_table = output_hydro_table.merge(input_huc.loc[:, [FIM_ID, 'HUC8']], how='left', on=FIM_ID)
if output_flows.HydroID.dtype != 'str':
output_flows.HydroID = output_flows.HydroID.astype(str)
output_hydro_table = output_hydro_table.merge(
output_flows.loc[:, ['HydroID', 'LakeID']], how='left', on='HydroID'
)
output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int)
output_hydro_table = output_hydro_table.rename(columns={'HUC8': 'HUC'})
if output_hydro_table.HUC.dtype != 'str':
output_hydro_table.HUC = output_hydro_table.HUC.astype(str)
output_hydro_table = output_hydro_table.drop(columns=FIM_ID)
if output_hydro_table.feature_id.dtype != 'int':
output_hydro_table.feature_id = output_hydro_table.feature_id.astype(int)
if output_hydro_table.feature_id.dtype != 'str':
output_hydro_table.feature_id = output_hydro_table.feature_id.astype(str)
# make src json
output_src_json = dict()
hydroID_list = unique(output_src['HydroID'])
for hid in hydroID_list:
indices_of_hid = output_src['HydroID'] == hid
stage_list = output_src['Stage'][indices_of_hid].astype(float)
q_list = output_src['Discharge (m3s-1)'][indices_of_hid].astype(float)
stage_list = stage_list.tolist()
q_list = q_list.tolist()
output_src_json[str(hid)] = {'q_list': q_list, 'stage_list': stage_list}
# write out
output_catchments.to_file(
output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False
)
output_flows.to_file(output_flows_fileName, driver=getDriver(output_flows_fileName), index=False)
output_src.to_csv(output_src_fileName, index=False)
output_crosswalk.to_csv(output_crosswalk_fileName, index=False)
output_hydro_table.to_csv(output_hydro_table_fileName, index=False)
with open(output_src_json_fileName, 'w') as f:
json.dump(output_src_json, f, sort_keys=True, indent=2)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves"
)
parser.add_argument("-d", "--input-catchments-fileName", help="DEM derived catchments", required=True)
parser.add_argument("-a", "--input-flows-fileName", help="DEM derived streams", required=True)
parser.add_argument(
"-s", "--input-srcbase-fileName", help="Base synthetic rating curve table", required=True
)
parser.add_argument(
"-l", "--output-catchments-fileName", help="Subset crosswalked catchments", required=True
)
parser.add_argument("-f", "--output-flows-fileName", help="Subset crosswalked streams", required=True)
parser.add_argument(
"-r", "--output-src-fileName", help="Output crosswalked synthetic rating curve table", required=True
)
parser.add_argument(
"-j", "--output-src-json-fileName", help="Output synthetic rating curve json", required=True
)
parser.add_argument("-x", "--output-crosswalk-fileName", help="Crosswalk table", required=True)
parser.add_argument("-t", "--output-hydro-table-fileName", help="Hydrotable", required=True)
parser.add_argument("-w", "--input-huc-fileName", help="HUC8 boundary", required=True)
parser.add_argument("-b", "--input-nwmflows-fileName", help="Subest NWM burnlines", required=True)
parser.add_argument("-y", "--input-nwmcatras-fileName", help="NWM catchment raster", required=False)
parser.add_argument(
"-m",
"--mannings-n",
help="Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.",
required=True,
)
parser.add_argument("-z", "--input-nwmcat-fileName", help="NWM catchment polygon", required=True)
parser.add_argument("-p", "--extent", help="GMS only for now", default="GMS", required=False)
parser.add_argument(
"-k", "--small-segments-filename", help="output list of short segments", required=True
)
parser.add_argument(
"-c", "--calibration-mode", help="Mannings calibration flag", required=False, action="store_true"
)
parser.add_argument("-e", "--min-catchment-area", help="Minimum catchment area", required=True)
parser.add_argument("-g", "--min-stream-length", help="Minimum stream length", required=True)
args = vars(parser.parse_args())
add_crosswalk(**args)