Skip to content

Commit

Permalink
Refactor NHDPlus HR preprocessing workflow
Browse files Browse the repository at this point in the history
Refactor NHDPlus HR preprocessing workflow. 

- Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with mainstem column.
- HUC8 intersections are included in the input headwaters layer.
- clip_vectors_to_wbd.py removes incoming stream segment from the selected layers.

This resolves #238.
  • Loading branch information
BrianAvant committed Apr 30, 2021
1 parent 199ab6e commit 9518cbe
Show file tree
Hide file tree
Showing 14 changed files with 539 additions and 437 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
<br/><br/>
## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371)

Refactor NHDPlus HR preprocessing workflow. Resolves issue #238

## Changes
- Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with `mainstem` column.
- HUC8 intersections are included in the input headwaters layer.
- `clip_vectors_to_wbd.py` removes incoming stream segment from the selected layers.

<br/><br/>
## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367)

Expand Down
15 changes: 5 additions & 10 deletions fim_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,11 @@ logFile=$outputRunDataDir/logs/summary.log

## Define inputs
export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg
export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg
export input_NWM_Catchments_fr=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg
export input_NWM_Catchments_ms=$inputDataDir/nwm_hydrofabric/nwm_catchments_ms.gpkg
export input_NWM_Flows_fr=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg
export input_NWM_Flows_ms=$inputDataDir/nwm_hydrofabric/nwm_flows_ms.gpkg
export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg
export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted.gpkg
export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted.gpkg
export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr.gpkg
export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms.gpkg
export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg
export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg
export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg
export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg
export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg

## Input handling ##
$srcDir/check_huc_inputs.py -u "$hucList"
Expand Down
1 change: 1 addition & 0 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
elif extent == 'MS':
## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc)
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1]
input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'})
if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat=input_nwmcat.set_index('feature_id')
Expand Down
237 changes: 140 additions & 97 deletions src/adjust_headwater_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,140 +3,183 @@
import geopandas as gpd
import pandas as pd
import numpy as np
from os.path import splitext
from tqdm import tqdm
import argparse
import pygeos
from shapely.geometry import Point,LineString
from shapely.ops import split
from shapely.wkb import dumps, loads
from utils.shared_variables import PREP_PROJECTION
from utils.shared_functions import getDriver
import warnings
warnings.simplefilter("ignore")

def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id):

# identify true headwater segments
if nhd_streams['headwaters_id'].dtype=='int':
nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy()
if headwaters[headwater_id].dtype != 'int': headwaters[headwater_id] = headwaters[headwater_id].astype(int)
else:
nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id.notna()) & (nhd_streams.downstream_of_headwater == False),:].copy()
def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id):

# Identify true headwater segments
nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy()
nhd_streams_adj = nhd_streams_adj.explode()
nhd_streams_adj = nhd_streams_adj.reset_index(drop=True)

headwater_limited = headwaters.merge(nhd_streams_adj["headwaters_id"],left_on=headwater_id, right_on="headwaters_id",how='right')
if nwm_headwaters["site_id"].dtype != 'int': nwm_headwaters["site_id"] = nwm_headwaters["site_id"].astype(int)
headwater_limited = nwm_headwaters.merge(nhd_streams_adj[["headwaters_id","mainstem"]],left_on="site_id", right_on="headwaters_id",how='right')
headwater_limited = headwater_limited.drop(columns=['headwaters_id'])

nws_lid_limited = nws_lids.merge(nhd_streams[["nws_lid"]],left_on="site_id", right_on="nws_lid",how='right')
nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!='']
nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid'])

# Check for issues in nws_lid layer
if len(nws_lid_limited) < len(nws_lids):
missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id))
print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}")

# Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments
headwater_pts = headwater_limited.append(nws_lid_limited)
headwater_pts = headwater_pts.reset_index(drop=True)

if headwater_pts is not None:

headwaterstreams = []
referencedpoints = []
snapped_ahps = []
nws_lid = []
for index, point in headwater_pts.iterrows():

# Convert headwaterpoint geometries to WKB representation
wkb_points = dumps(point.geometry)

# Create pygeos headwaterpoint geometries from WKB representation
pointbin_geom = pygeos.io.from_wkb(wkb_points)

if point.pt_type == 'nwm_headwater':
# Closest segment to headwater
closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]]
else:
# Closest segment to ahps site
closest_stream = nhd_streams.loc[nhd_streams["nws_lid"]==point[headwater_id]]

try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is
closest_stream =closest_stream.explode()
except:
pass

try:
wkb_closest_stream = dumps(closest_stream.geometry[0])
except:
wkb_closest_stream = dumps(closest_stream.geometry[0][0])

streambin_geom = pygeos.io.from_wkb(wkb_closest_stream)

# Linear reference headwater to closest stream segment
pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom)
referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline)

# Convert geometries to wkb representation
bin_referencedpoint = pygeos.io.to_wkb(referencedpoint)

# Convert to shapely geometries
shply_referencedpoint = loads(bin_referencedpoint)
shply_linestring = loads(wkb_closest_stream)
headpoint = Point(shply_referencedpoint.coords)

if point.pt_type == 'nwm_headwater':
cumulative_line = []
relativedistlst = []

# Collect all nhd stream segment linestring verticies
for point in zip(*shply_linestring.coords.xy):
cumulative_line = cumulative_line + [point]
relativedist = shply_linestring.project(Point(point))
relativedistlst = relativedistlst + [relativedist]

# Add linear referenced headwater point to closest nhd stream segment
if not headpoint in cumulative_line:
cumulative_line = cumulative_line + [headpoint]
relativedist = shply_linestring.project(headpoint)
relativedistlst = relativedistlst + [relativedist]

# Sort by relative line distance to place headwater point in linestring
sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist')
shply_linestring = LineString(sortline.geom.tolist())
referencedpoints = referencedpoints + [headpoint]

# Split the new linestring at the new headwater point
try:
line1,line2 = split(shply_linestring, headpoint)
headwaterstreams = headwaterstreams + [LineString(line1)]
nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1)
except:
line1 = split(shply_linestring, headpoint)
headwaterstreams = headwaterstreams + [LineString(line1[0])]
nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0])

try:
del cumulative_line, relativedistlst
except:
print (f"issue deleting adjusted stream variables for huc {huc}")

else:
snapped_ahps = snapped_ahps + [headpoint]
nws_lid = nws_lid + [point[headwater_id]]

nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater'])

headwaterstreams = []
referencedpoints = []

for index, point in headwater_limited.iterrows():

# convert headwaterpoint geometries to WKB representation
wkb_points = dumps(point.geometry)

# create pygeos headwaterpoint geometries from WKB representation
pointbin_geom = pygeos.io.from_wkb(wkb_points)

# Closest segment to headwater
closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]]

try: # seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is
closest_stream =closest_stream.explode()
except:
pass
try:
wkb_closest_stream = dumps(closest_stream.geometry[0])
del nhd_streams_adj, headwater_limited, referencedpoints, headwaterstreams
except:
wkb_closest_stream = dumps(closest_stream.geometry[0][0])

streambin_geom = pygeos.io.from_wkb(wkb_closest_stream)

# Linear reference headwater to closest stream segment
pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom)
referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline)

# convert geometries to wkb representation
bin_referencedpoint = pygeos.io.to_wkb(referencedpoint)

# convert to shapely geometries
shply_referencedpoint = loads(bin_referencedpoint)
shply_linestring = loads(wkb_closest_stream)
headpoint = Point(shply_referencedpoint.coords)
cumulative_line = []
relativedistlst = []

# collect all nhd stream segment linestring verticies
for point in zip(*shply_linestring.coords.xy):
cumulative_line = cumulative_line + [point]
relativedist = shply_linestring.project(Point(point))
relativedistlst = relativedistlst + [relativedist]

# add linear referenced headwater point to closest nhd stream segment
if not headpoint in cumulative_line:
cumulative_line = cumulative_line + [headpoint]
relativedist = shply_linestring.project(headpoint)
relativedistlst = relativedistlst + [relativedist]

# sort by relative line distance to place headwater point in linestring
sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist')
shply_linestring = LineString(sortline.geom.tolist())
referencedpoints = referencedpoints + [headpoint]

# split the new linestring at the new headwater point
try:
line1,line2 = split(shply_linestring, headpoint)
headwaterstreams = headwaterstreams + [LineString(line1)]
nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1)
except:
line1 = split(shply_linestring, headpoint)
headwaterstreams = headwaterstreams + [LineString(line1[0])]
nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0])
print (f"issue deleting adjusted stream variables for huc {huc}")

# Create snapped ahps sites
if len(snapped_ahps) > 0:
snapped_ahps_points = gpd.GeoDataFrame({'pt_type': 'nws_lid', headwater_id: nws_lid, 'mainstem': True,
'geometry': snapped_ahps},geometry='geometry',crs=PREP_PROJECTION)

nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater'])
# Identify ajusted nhd headwaters
nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:]
nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode()

try:
del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst
except:
print ('issue deleting adjusted stream variables for huc ' + str(huc))
hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object)
for index,lineString in enumerate(nhd_headwater_streams_adj.geometry):
hw_point = [point for point in zip(*lineString.coords.xy)][-1]
hw_points[index] = Point(*hw_point)

## identify ajusted nhd headwaters
# print('Identify NHD headwater points',flush=True)
nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:]
nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode()

hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object)
for index,lineString in enumerate(nhd_headwater_streams_adj.geometry):
hw_point = [point for point in zip(*lineString.coords.xy)][-1]
hw_points[index] = Point(*hw_point)
nhd_headwater_points_adj = gpd.GeoDataFrame({'pt_type': 'NHDPlusID', headwater_id: nhd_headwater_streams_adj['NHDPlusID'],
'mainstem': False, 'geometry': hw_points},geometry='geometry',crs=PREP_PROJECTION)

nhd_headwater_points_adj = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams_adj['NHDPlusID'],
'geometry' : hw_points},geometry='geometry',crs=PREP_PROJECTION)
nhd_headwater_points_adj = nhd_headwater_points_adj.reset_index(drop=True)

del nhd_headwater_streams_adj
del nhd_headwater_streams_adj

try:
combined_pts = snapped_ahps_points.append(nhd_headwater_points_adj)
except:
combined_pts = nhd_headwater_points_adj.copy()

return(nhd_streams, nhd_headwater_points_adj)
return nhd_streams, combined_pts

if __name__ == '__main__':

parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points')
parser.add_argument('-f','--huc',help='huc number',required=True)
parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True)
parser.add_argument('-p','--headwaters',help='Headwater points layer',required=True,type=str)
parser.add_argument('-p','--nwm-headwaters',help='Headwater points layer',required=True,type=str)
parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None)
parser.add_argument('-s','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None)
parser.add_argument('-a','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None)
parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None)
parser.add_argument('-i','--headwater-id',help='Output headwaters points',required=True)
parser.add_argument('-b','--nws-lids',help='NWS lid points',required=True)
parser.add_argument('-i','--headwater-id',help='Headwater id column name',required=True)

args = vars(parser.parse_args())

adj_streams_gdf,adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,headwaters,headwater_id)
adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id)

if subset_nhd_streams_fileName is not None:
adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False)
adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']))

if headwater_points_fileName is not None:
headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']),index=False)
headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']))

if adj_headwater_points_fileName is not None:
adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']),index=False)
adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']))
18 changes: 9 additions & 9 deletions src/aggregate_fim_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ def aggregate_fim_outputs(args):

## aggregate rasters
# aggregate file paths
rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_unprj.tif')
catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_unprj.tif')
rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_prepprj.tif')
catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_prepprj.tif')

if huc6 not in huc_list:

Expand Down Expand Up @@ -155,28 +155,28 @@ def aggregate_fim_outputs(args):
shutil.copy(catchment_filename, catchment_mosaic)

## reproject rasters
reproject_raster(rem_mosaic)
reproject_raster(rem_mosaic,VIZ_PROJECTION)
os.remove(rem_mosaic)

reproject_raster(catchment_mosaic)
reproject_raster(catchment_mosaic,VIZ_PROJECTION)
os.remove(catchment_mosaic)


def reproject_raster(raster_name):
def reproject_raster(raster_name,reprojection):

with rasterio.open(raster_name) as src:
transform, width, height = calculate_default_transform(
src.crs, VIZ_PROJECTION, src.width, src.height, *src.bounds)
src.crs, reprojection, src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': VIZ_PROJECTION,
'crs': reprojection,
'transform': transform,
'width': width,
'height': height,
'compress': 'lzw'
})

raster_proj_rename = os.path.split(raster_name)[1].replace('_unprj.tif', '.tif')
raster_proj_rename = os.path.split(raster_name)[1].replace('_prepprj.tif', '.tif')
raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename)

with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst:
Expand All @@ -187,7 +187,7 @@ def reproject_raster(raster_name):
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=VIZ_PROJECTION,
dst_crs=reprojection,
resampling=Resampling.nearest)
del src, dst

Expand Down
Loading

0 comments on commit 9518cbe

Please sign in to comment.