Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1pt] PR: Clip ocean mask from DEM and extend outlet streams #1121

Merged
merged 12 commits into from
May 6, 2024
133 changes: 109 additions & 24 deletions data/wbd/clip_vectors_to_wbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import geopandas as gpd
import pandas as pd
import rasterio as rio
from shapely.geometry import MultiPolygon, Polygon
from shapely.geometry import LineString, MultiPolygon, Point, Polygon
from shapely.ops import nearest_points

from utils.shared_functions import getDriver


# from utils.shared_variables import DEFAULT_FIM_PROJECTION_CRS
gpd.options.io_engine = "pyogrio"


def subset_vector_layers(
Expand Down Expand Up @@ -40,6 +41,52 @@ def subset_vector_layers(
subset_levee_protected_areas,
huc_CRS,
):
def extend_outlet_streams(streams, wbd_buffered, wbd):
"""
Extend outlet streams to nearest buffered WBD boundary
"""

wbd['geometry'] = wbd.geometry.boundary
wbd = gpd.GeoDataFrame(data=wbd, geometry='geometry')

wbd_buffered["linegeom"] = wbd_buffered.geometry

# Select only the streams that are outlets
levelpath_outlets = streams[streams['to'] == 0]

# Select only the streams that don't intersect the WBD boundary line
levelpath_outlets = levelpath_outlets[~levelpath_outlets.intersects(wbd['geometry'].iloc[0])]

levelpath_outlets['nearest_point'] = None
levelpath_outlets['last'] = None

for index, row in levelpath_outlets.iterrows():
coords = [(coords) for coords in list(row['geometry'].coords)]
last_coord = coords[-1]
levelpath_outlets.at[index, 'last'] = Point(last_coord)

wbd_buffered['geometry'] = wbd_buffered.geometry.boundary
wbd_buffered = gpd.GeoDataFrame(data=wbd_buffered, geometry='geometry')

for index, row in levelpath_outlets.iterrows():
levelpath_geom = row['last']
nearest_point = nearest_points(levelpath_geom, wbd_buffered)

levelpath_outlets.at[index, 'nearest_point'] = nearest_point[1]['geometry'].iloc[0]

levelpath_outlets.at[index, 'geometry'] = LineString(
list(row['geometry'].coords) + list([levelpath_outlets.at[index, 'nearest_point'].coords[0]])
)

levelpath_outlets = gpd.GeoDataFrame(data=levelpath_outlets, geometry='geometry')
levelpath_outlets = levelpath_outlets.drop(columns=['last', 'nearest_point'])

# Replace the streams in the original file with the extended streams
streams = streams[~streams['ID'].isin(levelpath_outlets['ID'])]
streams = pd.concat([streams, levelpath_outlets], ignore_index=True)

return streams

print(f"Getting Cell Size for {hucCode}", flush=True)
with rio.open(dem_filename) as dem_raster:
dem_cellsize = max(dem_raster.res)
Expand All @@ -53,39 +100,62 @@ def subset_vector_layers(
wbd_buffer.geometry = wbd_buffer.geometry.buffer(wbd_buffer_distance, resolution=32)
wbd_buffer = gpd.clip(wbd_buffer, dem_domain)

# Clip ocean water polygon for future masking ocean areas (where applicable)
landsea = gpd.read_file(landsea, mask=wbd_buffer, engine="fiona")
if not landsea.empty:
print(f"Create landsea gpkg for {hucCode}", flush=True)
landsea.to_file(
subset_landsea, driver=getDriver(subset_landsea), index=False, crs=huc_CRS, engine="fiona"
)

# Exclude landsea area from WBD and wbd_buffer
wbd = wbd.overlay(landsea, how='difference')
wbd.to_file(
wbd_filename,
layer='WBDHU8',
driver=getDriver(wbd_filename),
index=False,
crs=huc_CRS,
engine="fiona",
)

wbd_buffer = wbd_buffer.overlay(landsea, how='difference')

del landsea

# Make the streams buffer smaller than the wbd_buffer so streams don't reach the edge of the DEM
wbd_streams_buffer = wbd_buffer.copy()
wbd_streams_buffer.geometry = wbd_streams_buffer.geometry.buffer(-8 * dem_cellsize, resolution=32)

wbd_buffer = wbd_buffer[['geometry']]
wbd_streams_buffer = wbd_streams_buffer[['geometry']]
wbd_buffer.to_file(wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False, crs=huc_CRS)
wbd_buffer.to_file(
wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False, crs=huc_CRS, engine="fiona"
)
wbd_streams_buffer.to_file(
wbd_streams_buffer_filename, driver=getDriver(wbd_streams_buffer_filename), index=False, crs=huc_CRS
wbd_streams_buffer_filename,
driver=getDriver(wbd_streams_buffer_filename),
index=False,
crs=huc_CRS,
engine="fiona",
)

# Clip ocean water polygon for future masking ocean areas (where applicable)
landsea = gpd.read_file(landsea, mask=wbd_buffer)
if not landsea.empty:
print(f"Create landsea gpkg for {hucCode}", flush=True)
landsea.to_file(subset_landsea, driver=getDriver(subset_landsea), index=False, crs=huc_CRS)
del landsea

# Clip levee-protected areas polygons for future masking ocean areas (where applicable)
print(f"Subsetting Levee Protected Areas for {hucCode}", flush=True)
levee_protected_areas = gpd.read_file(levee_protected_areas, mask=wbd_buffer)
levee_protected_areas = gpd.read_file(levee_protected_areas, mask=wbd_buffer, engine="fiona")
if not levee_protected_areas.empty:
levee_protected_areas.to_file(
subset_levee_protected_areas,
driver=getDriver(subset_levee_protected_areas),
index=False,
crs=huc_CRS,
engine="fiona",
)
del levee_protected_areas

# Find intersecting lakes and writeout
print(f"Subsetting NWM Lakes for {hucCode}", flush=True)
nwm_lakes = gpd.read_file(nwm_lakes, mask=wbd_buffer)
nwm_lakes = gpd.read_file(nwm_lakes, mask=wbd_buffer, engine="fiona")
nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0]

if not nwm_lakes.empty:
Expand All @@ -97,34 +167,43 @@ def subset_vector_layers(
# Loop through the filled polygons and insert the new geometry
for i in range(len(nwm_lakes_fill_holes.geoms)):
nwm_lakes.loc[i, 'geometry'] = nwm_lakes_fill_holes.geoms[i]
nwm_lakes.to_file(subset_nwm_lakes, driver=getDriver(subset_nwm_lakes), index=False, crs=huc_CRS)
nwm_lakes.to_file(
subset_nwm_lakes, driver=getDriver(subset_nwm_lakes), index=False, crs=huc_CRS, engine="fiona"
)
del nwm_lakes

# Find intersecting levee lines
print(f"Subsetting NLD levee lines for {hucCode}", flush=True)
nld_lines = gpd.read_file(nld_lines, mask=wbd_buffer)
nld_lines = gpd.read_file(nld_lines, mask=wbd_buffer, engine="fiona")
if not nld_lines.empty:
nld_lines.to_file(subset_nld_lines, driver=getDriver(subset_nld_lines), index=False, crs=huc_CRS)
nld_lines.to_file(
subset_nld_lines, driver=getDriver(subset_nld_lines), index=False, crs=huc_CRS, engine="fiona"
)
del nld_lines

# Preprocessed levee lines for burning
nld_lines_preprocessed = gpd.read_file(nld_lines_preprocessed, mask=wbd_buffer)
nld_lines_preprocessed = gpd.read_file(nld_lines_preprocessed, mask=wbd_buffer, engine="fiona")
if not nld_lines_preprocessed.empty:
nld_lines_preprocessed.to_file(
subset_nld_lines_preprocessed,
driver=getDriver(subset_nld_lines_preprocessed),
index=False,
crs=huc_CRS,
engine="fiona",
)
del nld_lines_preprocessed

# Subset NWM headwaters
print(f"Subsetting NWM Headwater Points for {hucCode}", flush=True)
nwm_headwaters = gpd.read_file(nwm_headwaters, mask=wbd_streams_buffer)
nwm_headwaters = gpd.read_file(nwm_headwaters, mask=wbd_streams_buffer, engine="fiona")

if len(nwm_headwaters) > 0:
nwm_headwaters.to_file(
subset_nwm_headwaters, driver=getDriver(subset_nwm_headwaters), index=False, crs=huc_CRS
subset_nwm_headwaters,
driver=getDriver(subset_nwm_headwaters),
index=False,
crs=huc_CRS,
engine="fiona",
)
else:
print("No headwater point(s) within HUC " + str(hucCode) + " boundaries.")
Expand All @@ -134,11 +213,15 @@ def subset_vector_layers(

# Find intersecting nwm_catchments
print(f"Subsetting NWM Catchments for {hucCode}", flush=True)
nwm_catchments = gpd.read_file(nwm_catchments, mask=wbd_buffer)
nwm_catchments = gpd.read_file(nwm_catchments, mask=wbd_buffer, engine="fiona")

if len(nwm_catchments) > 0:
nwm_catchments.to_file(
subset_nwm_catchments, driver=getDriver(subset_nwm_catchments), index=False, crs=huc_CRS
subset_nwm_catchments,
driver=getDriver(subset_nwm_catchments),
index=False,
crs=huc_CRS,
engine="fiona",
)
else:
print("No NWM catchments within HUC " + str(hucCode) + " boundaries.")
Expand All @@ -149,11 +232,13 @@ def subset_vector_layers(
# Subset nwm streams
print(f"Subsetting NWM Streams for {hucCode}", flush=True)

nwm_streams = gpd.read_file(nwm_streams, mask=wbd_buffer)
nwm_streams = gpd.read_file(nwm_streams, mask=wbd_buffer, engine="fiona")

# NWM can have duplicate records, but appear to always be identical duplicates
nwm_streams.drop_duplicates(subset="ID", keep="first", inplace=True)

nwm_streams = extend_outlet_streams(nwm_streams, wbd_buffer, wbd)

nwm_streams_outlets = nwm_streams[~nwm_streams['to'].isin(nwm_streams['ID'])]
nwm_streams_nonoutlets = nwm_streams[nwm_streams['to'].isin(nwm_streams['ID'])]

Expand All @@ -177,7 +262,7 @@ def subset_vector_layers(
nwm_streams = pd.concat([nwm_streams_nonoutlets, nwm_streams_outlets])

nwm_streams.to_file(
subset_nwm_streams, driver=getDriver(subset_nwm_streams), index=False, crs=huc_CRS
subset_nwm_streams, driver=getDriver(subset_nwm_streams), index=False, crs=huc_CRS, engine="fiona"
)
else:
print("No NWM stream segments within HUC " + str(hucCode) + " boundaries.")
Expand Down Expand Up @@ -230,7 +315,7 @@ def subset_vector_layers(
'-lps', '--subset-levee-protected-areas', help='Levee-protected areas subset', required=True
)

parser.add_argument('-crs', '--huc-crs', help='HUC crs', required=True)
parser.add_argument('-crs', '--huc-CRS', help='HUC crs', required=True)

args = vars(parser.parse_args())

Expand Down
10 changes: 5 additions & 5 deletions data/wbd/generate_pre_clip_fim_huc8.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@

Usage:
generate_pre_clip_fim_huc8.py
-n /data/inputs/pre_clip_huc8/24_3_20
-u /data/inputs/huc_lists/included_huc8.lst
-n /data/inputs/pre_clip_huc8/24_04_23
-u /data/inputs/huc_lists/included_huc8_withAlaska.lst
-j 6
-o

Notes:
If running this script to generate new data, modify the pre_clip_huc_dir variable in
src/bash_variables.env to the corresponding outputs_dir argument after running and testing this script.
The newly generated data should be created in a new folder using the format <year_month_day>
(i.e. September 26, 2023 would be 23_9_26)
(i.e. September 26, 2023 would be 23_09_26)
'''

srcDir = os.getenv('srcDir')
Expand Down Expand Up @@ -428,7 +428,7 @@ def huc_level_clip_vectors_to_wbd(args):
usage='''
./generate_pre_clip_fim_huc8.py
-n /data/inputs/pre_clip_huc8/24_3_20
-u /data/inputs/huc_lists/included_huc8.lst
-u /data/inputs/huc_lists/included_huc8_withAlaska.lst
-j 6
-o
''',
Expand All @@ -438,7 +438,7 @@ def huc_level_clip_vectors_to_wbd(args):
'-n',
'--outputs_dir',
help='Directory to output all of the HUC level .gpkg files. Use the format: '
'<year_month_day> (i.e. September 26, 2023 would be 2023_9_26)',
'<year_month_day> (i.e. September 26, 2023 would be 23_09_26)',
)
parser.add_argument('-u', '--huc_list', help='List of HUCs to genereate pre-clipped vectors for.')
parser.add_argument(
Expand Down
12 changes: 12 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.4.x.x - 2024-04-15 - [PR#1121](https://github.com/NOAA-OWP/inundation-mapping/pull/1121)

Some NWM streams, particularly in coastal areas, fail to reach the edge of the DEM resulting in reverse flow. This issue was resolved by clipping the ocean mask from the buffered WBD and DEM, and any remaining streams that didn't have outlets reaching the edge of the buffered WBD boundary were extended by snapping the end to the nearest point on the buffered WBD.

### Changes

- `data/wbd/clip_vectors_to_wbd.py`: Clips `landsea` ocean mask from the buffered WBD and adds a function to extend outlet streams to the buffered WBD

<br/><br/>



## v4.4.15.0 - 2024-04-17 - [PR#1081](https://github.com/NOAA-OWP/inundation-mapping/pull/1081)

Expand Down Expand Up @@ -86,6 +97,7 @@ The "black" packages is also be upgraded from 23.7.0 to 24.3.

<br/><br/>


## v4.4.13.1 - 2024-03-11 - [PR#1086](https://github.com/NOAA-OWP/inundation-mapping/pull/1086)

Fixes bug where levee-protected areas were not being masked from branch 0 DEMs.
Expand Down