Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1pt] PR: Clip ocean mask from DEM and extend outlet streams #1121

Merged
merged 12 commits into from
May 6, 2024
177 changes: 144 additions & 33 deletions data/wbd/clip_vectors_to_wbd.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,83 @@
#!/usr/bin/env python3

import argparse
import logging
import sys

import geopandas as gpd
import pandas as pd
import rasterio as rio
from shapely.geometry import MultiPolygon, Polygon
from shapely.geometry import LineString, MultiPolygon, Point, Polygon
from shapely.ops import nearest_points

from utils.shared_functions import getDriver


# from utils.shared_variables import DEFAULT_FIM_PROJECTION_CRS
gpd.options.io_engine = "pyogrio"


def extend_outlet_streams(streams, wbd_buffered, wbd):
"""
Extend outlet streams to nearest buffered WBD boundary
"""

wbd['geometry'] = wbd.geometry.boundary
wbd = gpd.GeoDataFrame(data=wbd, geometry='geometry')

wbd_buffered["linegeom"] = wbd_buffered.geometry

# Select only the streams that are outlets
levelpath_outlets = streams[streams['to'] == 0]

# Select only the streams that don't intersect the WBD boundary line
levelpath_outlets = levelpath_outlets[~levelpath_outlets.intersects(wbd['geometry'].iloc[0])]

levelpath_outlets['nearest_point'] = None
levelpath_outlets['last'] = None

# print(list(levelpath_outlets.columns))

levelpath_outlets = levelpath_outlets.explode(index_parts=False)

for index, row in levelpath_outlets.iterrows():
coords = [(coords) for coords in list(row['geometry'].coords)]
last_coord = coords[-1]
levelpath_outlets.at[index, 'last'] = Point(last_coord)

wbd_buffered['geometry'] = wbd_buffered.geometry.boundary
wbd_buffered = gpd.GeoDataFrame(data=wbd_buffered, geometry='geometry')

for index, row in levelpath_outlets.iterrows():
levelpath_geom = row['last']
nearest_point = nearest_points(levelpath_geom, wbd_buffered)

levelpath_outlets.at[index, 'nearest_point'] = nearest_point[1]['geometry'].iloc[0]

levelpath_outlets_nearest_points = levelpath_outlets.at[index, 'nearest_point']
if isinstance(levelpath_outlets_nearest_points, pd.Series):
levelpath_outlets_nearest_points = levelpath_outlets_nearest_points.iloc[-1]

# tlist = list([levelpath_outlets.at[index, 'nearest_point'].coords[0]])
# tlist = list([levelpath_outlets.at[index, 'nearest_point'].row.geometry.get_coordinates().oords[0]])

levelpath_outlets.at[index, 'geometry'] = LineString(
list(row['geometry'].coords) + list([levelpath_outlets_nearest_points.coords[0]])
)

# levelpath_outlets.at[index, 'geometry'] = LineString(
# list(row.geometry.get_coordinates()) + list([levelpath_outlets.at[index, 'nearest_point'].coords[0]])
# )

# geometry.get_coordinates()

levelpath_outlets = gpd.GeoDataFrame(data=levelpath_outlets, geometry='geometry')
levelpath_outlets = levelpath_outlets.drop(columns=['last', 'nearest_point'])

# Replace the streams in the original file with the extended streams
streams = streams[~streams['ID'].isin(levelpath_outlets['ID'])]
streams = pd.concat([streams, levelpath_outlets], ignore_index=True)

return streams


def subset_vector_layers(
Expand Down Expand Up @@ -40,52 +106,80 @@ def subset_vector_layers(
subset_levee_protected_areas,
huc_CRS,
):
print(f"Getting Cell Size for {hucCode}", flush=True)

# print(f"Getting Cell Size for {hucCode}", flush=True)
with rio.open(dem_filename) as dem_raster:
dem_cellsize = max(dem_raster.res)

wbd = gpd.read_file(wbd_filename, engine="pyogrio", use_arrow=True)
dem_domain = gpd.read_file(dem_domain, engine="pyogrio", use_arrow=True)

# Get wbd buffer
print(f"Create wbd buffer for {hucCode}", flush=True)
logging.info(f"Create wbd buffer for {hucCode}")
wbd_buffer = wbd.copy()
wbd_buffer.geometry = wbd_buffer.geometry.buffer(wbd_buffer_distance, resolution=32)
wbd_buffer = gpd.clip(wbd_buffer, dem_domain)

# Clip ocean water polygon for future masking ocean areas (where applicable)
logging.info(f"Clip ocean water polygon for {hucCode}")
landsea = gpd.read_file(landsea, mask=wbd_buffer, engine="fiona")
if not landsea.empty:
# print(f"Create landsea gpkg for {hucCode}", flush=True)
landsea.to_file(
subset_landsea, driver=getDriver(subset_landsea), index=False, crs=huc_CRS, engine="fiona"
)

# Exclude landsea area from WBD and wbd_buffer
wbd = wbd.overlay(landsea, how='difference')
wbd.to_file(
wbd_filename,
layer='WBDHU8',
driver=getDriver(wbd_filename),
index=False,
crs=huc_CRS,
engine="fiona",
)

wbd_buffer = wbd_buffer.overlay(landsea, how='difference')

del landsea

# Make the streams buffer smaller than the wbd_buffer so streams don't reach the edge of the DEM
logging.info(f"Create stream buffer for {hucCode}")
wbd_streams_buffer = wbd_buffer.copy()
wbd_streams_buffer.geometry = wbd_streams_buffer.geometry.buffer(-8 * dem_cellsize, resolution=32)

wbd_buffer = wbd_buffer[['geometry']]
wbd_streams_buffer = wbd_streams_buffer[['geometry']]
wbd_buffer.to_file(wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False, crs=huc_CRS)
wbd_buffer.to_file(
wbd_buffer_filename, driver=getDriver(wbd_buffer_filename), index=False, crs=huc_CRS, engine="fiona"
)
wbd_streams_buffer.to_file(
wbd_streams_buffer_filename, driver=getDriver(wbd_streams_buffer_filename), index=False, crs=huc_CRS
wbd_streams_buffer_filename,
driver=getDriver(wbd_streams_buffer_filename),
index=False,
crs=huc_CRS,
engine="fiona",
)

# Clip ocean water polygon for future masking ocean areas (where applicable)
landsea = gpd.read_file(landsea, mask=wbd_buffer)
if not landsea.empty:
print(f"Create landsea gpkg for {hucCode}", flush=True)
landsea.to_file(subset_landsea, driver=getDriver(subset_landsea), index=False, crs=huc_CRS)
del landsea

# Clip levee-protected areas polygons for future masking ocean areas (where applicable)
print(f"Subsetting Levee Protected Areas for {hucCode}", flush=True)
levee_protected_areas = gpd.read_file(levee_protected_areas, mask=wbd_buffer)
# print(f"Subsetting Levee Protected Areas for {hucCode}", flush=True)
logging.info(f"Clip levee-protected areas for {hucCode}")
levee_protected_areas = gpd.read_file(levee_protected_areas, mask=wbd_buffer, engine="fiona")
if not levee_protected_areas.empty:
levee_protected_areas.to_file(
subset_levee_protected_areas,
driver=getDriver(subset_levee_protected_areas),
index=False,
crs=huc_CRS,
engine="fiona",
)
del levee_protected_areas

# Find intersecting lakes and writeout
print(f"Subsetting NWM Lakes for {hucCode}", flush=True)
nwm_lakes = gpd.read_file(nwm_lakes, mask=wbd_buffer)
# print(f"Subsetting NWM Lakes for {hucCode}", flush=True)
logging.info(f"Subsetting NWM Lakes for {hucCode}")
nwm_lakes = gpd.read_file(nwm_lakes, mask=wbd_buffer, engine="fiona")
nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0]

if not nwm_lakes.empty:
Expand All @@ -97,63 +191,79 @@ def subset_vector_layers(
# Loop through the filled polygons and insert the new geometry
for i in range(len(nwm_lakes_fill_holes.geoms)):
nwm_lakes.loc[i, 'geometry'] = nwm_lakes_fill_holes.geoms[i]
nwm_lakes.to_file(subset_nwm_lakes, driver=getDriver(subset_nwm_lakes), index=False, crs=huc_CRS)
nwm_lakes.to_file(
subset_nwm_lakes, driver=getDriver(subset_nwm_lakes), index=False, crs=huc_CRS, engine="fiona"
)
del nwm_lakes

# Find intersecting levee lines
print(f"Subsetting NLD levee lines for {hucCode}", flush=True)
nld_lines = gpd.read_file(nld_lines, mask=wbd_buffer)
logging.info(f"Subsetting NLD levee lines for {hucCode}")
nld_lines = gpd.read_file(nld_lines, mask=wbd_buffer, engine="fiona")
if not nld_lines.empty:
nld_lines.to_file(subset_nld_lines, driver=getDriver(subset_nld_lines), index=False, crs=huc_CRS)
nld_lines.to_file(
subset_nld_lines, driver=getDriver(subset_nld_lines), index=False, crs=huc_CRS, engine="fiona"
)
del nld_lines

# Preprocessed levee lines for burning
nld_lines_preprocessed = gpd.read_file(nld_lines_preprocessed, mask=wbd_buffer)
nld_lines_preprocessed = gpd.read_file(nld_lines_preprocessed, mask=wbd_buffer, engine="fiona")
if not nld_lines_preprocessed.empty:
nld_lines_preprocessed.to_file(
subset_nld_lines_preprocessed,
driver=getDriver(subset_nld_lines_preprocessed),
index=False,
crs=huc_CRS,
engine="fiona",
)
del nld_lines_preprocessed

# Subset NWM headwaters
print(f"Subsetting NWM Headwater Points for {hucCode}", flush=True)
nwm_headwaters = gpd.read_file(nwm_headwaters, mask=wbd_streams_buffer)
logging.info(f"Subsetting NWM Headwater Points for {hucCode}")
nwm_headwaters = gpd.read_file(nwm_headwaters, mask=wbd_streams_buffer, engine="fiona")

if len(nwm_headwaters) > 0:
nwm_headwaters.to_file(
subset_nwm_headwaters, driver=getDriver(subset_nwm_headwaters), index=False, crs=huc_CRS
subset_nwm_headwaters,
driver=getDriver(subset_nwm_headwaters),
index=False,
crs=huc_CRS,
engine="fiona",
)
else:
print("No headwater point(s) within HUC " + str(hucCode) + " boundaries.")
logging.info("No headwater point(s) within HUC " + str(hucCode) + " boundaries.")
sys.exit(0)

del nwm_headwaters

# Find intersecting nwm_catchments
print(f"Subsetting NWM Catchments for {hucCode}", flush=True)
nwm_catchments = gpd.read_file(nwm_catchments, mask=wbd_buffer)
# print(f"Subsetting NWM Catchments for {hucCode}", flush=True)
nwm_catchments = gpd.read_file(nwm_catchments, mask=wbd_buffer, engine="fiona")

if len(nwm_catchments) > 0:
nwm_catchments.to_file(
subset_nwm_catchments, driver=getDriver(subset_nwm_catchments), index=False, crs=huc_CRS
subset_nwm_catchments,
driver=getDriver(subset_nwm_catchments),
index=False,
crs=huc_CRS,
engine="fiona",
)
else:
print("No NWM catchments within HUC " + str(hucCode) + " boundaries.")
logging.info("No NWM catchments within HUC " + str(hucCode) + " boundaries.")
sys.exit(0)

del nwm_catchments

# Subset nwm streams
print(f"Subsetting NWM Streams for {hucCode}", flush=True)

nwm_streams = gpd.read_file(nwm_streams, mask=wbd_buffer)
logging.info(f"Subsetting NWM Streams for {hucCode}")
nwm_streams = gpd.read_file(nwm_streams, mask=wbd_buffer, engine="fiona")

# NWM can have duplicate records, but appear to always be identical duplicates
nwm_streams.drop_duplicates(subset="ID", keep="first", inplace=True)

nwm_streams = extend_outlet_streams(nwm_streams, wbd_buffer, wbd)

nwm_streams_outlets = nwm_streams[~nwm_streams['to'].isin(nwm_streams['ID'])]
nwm_streams_nonoutlets = nwm_streams[nwm_streams['to'].isin(nwm_streams['ID'])]

Expand All @@ -177,10 +287,11 @@ def subset_vector_layers(
nwm_streams = pd.concat([nwm_streams_nonoutlets, nwm_streams_outlets])

nwm_streams.to_file(
subset_nwm_streams, driver=getDriver(subset_nwm_streams), index=False, crs=huc_CRS
subset_nwm_streams, driver=getDriver(subset_nwm_streams), index=False, crs=huc_CRS, engine="fiona"
)
else:
print("No NWM stream segments within HUC " + str(hucCode) + " boundaries.")
logging.info("No NWM stream segments within HUC " + str(hucCode) + " boundaries.")
sys.exit(0)
del nwm_streams

Expand Down Expand Up @@ -230,7 +341,7 @@ def subset_vector_layers(
'-lps', '--subset-levee-protected-areas', help='Levee-protected areas subset', required=True
)

parser.add_argument('-crs', '--huc-crs', help='HUC crs', required=True)
parser.add_argument('-crs', '--huc-CRS', help='HUC crs', required=True)

args = vars(parser.parse_args())

Expand Down
Loading
Loading