Merge pull request #79 from Deltares/rivers

Rivers
Deltares · Nov 19, 2021 · b51e0ff · b51e0ff
2 parents 63443f1 + 34b79b8
commit b51e0ff
Show file tree

Hide file tree

Showing 14 changed files with 386 additions and 29 deletions.
diff --git a/docs/api/api_methods.rst b/docs/api/api_methods.rst
@@ -251,6 +251,7 @@ visit the `pyflwdir docs. <https://deltares.github.io/pyflwdir/latest/>`_
    flw.gauge_map
    flw.outlet_map
    flw.clip_basins
+   flw.floodplain_elevation
 
 
 General GIS methods

diff --git a/docs/api/api_workflows.rst b/docs/api/api_workflows.rst
@@ -23,6 +23,16 @@ Basin mask
    workflows.basin_mask.get_basin_geometry
    workflows.basin_mask.parse_region
 
+River bathymetry
+================
+
+.. autosummary::
+   :toctree: ../_generated
+
+   workflows.rivers.river_width
+   workflows.rivers.river_depth
+
+
 Forcing
 =======
 

diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -5,20 +5,25 @@ All notable changes to this project will be documented in this page.
 The format is based on `Keep a Changelog`_, and this project adheres to
 `Semantic Versioning`_.
 
-Unreleased
-----------
+v0.4.4 (19 November 2011)
+-------------------------
 
 Added
 ^^^^^
 - flw.d8_from_dem to derive a flow direction raster from a DEM
 - flw.reproject_hydrography_like to reproject flow direction raster data
+- flw.floodplain_elevation method which returns floodplain classification and hydrologically adjusted elevation
 - raster.flipud method to flip data along y-axis
 - raster.area_grid to get the raster cell areas [m2]
 - raster.density_grid to convert the values to [unit/m2]
-- gis_utils.spread2d method wrapping its pyflwdir equivalent
+- gis_utils.spread2d method (wrapping its pyflwdir equivalent) to spread values on a raster
+- gis_utils.nearest and gis_utils.nearest_merge methods to merge GeoDataFrame based on proximity
+- river_width to estimate a segment average river width based on a river mask raster 
+- river_depth to get segment average river depth estimates based bankfull discharge (requires pyflwdir v0.5.2)
 
 Changed
 ^^^^^^^
+- bumped hydromt-artifacts version to v0.0.6
 - In model API build and update functions, if any write_* are called in the ini file (opt), 
   the final self.write() call is skipped. This enables passing custom arguments to the write_ 
   functions without double writting files or costumizing the order in which write_ functions 

diff --git a/envs/hydromt-dev.yml b/envs/hydromt-dev.yml
@@ -17,7 +17,7 @@ dependencies:
   - entrypoints
   - flit>=3.2
   - gdal>=3.1
-  - geopandas
+  - geopandas>=0.10
   - jupyter # to run examples
   - netcdf4
   - matplotlib # to run examples
@@ -28,7 +28,7 @@ dependencies:
   - pandas
   - pcraster # TODO make optional
   - pip
-  - pyflwdir>=0.5.0
+  - pyflwdir>=0.5.3
   - pygeos>=0.8
   - pytest # tests
   - pytest-cov # tests

diff --git a/envs/hydromt-min.yml b/envs/hydromt-min.yml
@@ -14,13 +14,13 @@ dependencies:
   - entrypoints
   - flit>=3.2
   - gdal>=3.1
-  - geopandas
+  - geopandas>=0.10
   - netcdf4
   - numba
   - numpy
   - openpyxl
   - pandas
-  - pyflwdir>=0.5.0
+  - pyflwdir>=0.5.3
   - pygeos>=0.8
   - python>=3.8
   - rasterio

diff --git a/envs/hydromt-test.yml b/envs/hydromt-test.yml
@@ -14,14 +14,14 @@ dependencies:
   - dask
   - entrypoints
   - gdal>=3.1
-  - geopandas
+  - geopandas>=0.10
   - netcdf4
   - numba
   - numpy
   - openpyxl
   - pandas
   - pcraster # optional
-  - pyflwdir>=0.5.0
+  - pyflwdir>=0.5.3
   - pygeos>=0.8
   - pytest # tests
   - pytest-cov # tests

diff --git a/hydromt/__init__.py b/hydromt/__init__.py
@@ -1,7 +1,7 @@
 """HydroMT: Build and analyze models like a data-wizard!"""
 
 # version number without 'v' at start
-__version__ = "0.4.4.dev"
+__version__ = "0.4.4"
 
 import geopandas as gpd
 import warnings

diff --git a/hydromt/data_adapter.py b/hydromt/data_adapter.py
@@ -36,7 +36,7 @@ class DataCatalog(object):
     # root URL and version with data artifacts
     # url = f"{_url}/download/{_version}/<filename>"
     _url = r"https://github.com/DirkEilander/hydromt-artifacts/releases"
-    _version = "v0.0.5"  # latest version
+    _version = "v0.0.6"  # latest version
 
     def __init__(self, data_libs=None, logger=logger, **artifact_keys):
         """Catalog of DataAdapter sources to easily read from different files

diff --git a/hydromt/flw.py b/hydromt/flw.py
@@ -604,3 +604,88 @@ def clip_basins(ds, flwdir, xy, flwdir_name="flwdir", **stream_kwargs):
     # clip data
     ds.coords["mask"] = da_basins
     return ds.raster.clip_mask(da_basins)
+
+
+def floodplain_elevation(
+    ds_model: xr.Dataset,
+    adjust_river_d8: bool = False,
+    connectivity=4,
+    logger=logger,
+    **kwargs,
+) -> xr.Dataset:
+    """Returns a binary floodplain classification and hydrologically adjusted elevation.
+
+    Parameters
+    ----------
+    ds_model : xr.Dataset
+        Model dataset containing flow directions ("flwdir") [-], elevation ("elevtn") [m+REF]
+        and upstream area ("uparea") [km2] variables.
+    connectivity: {4, 8}
+        D4 or D8 flow connectivity.
+    river_d8 : bool
+        If True and overall D4 connectivity, additionally condition river cells to D8.
+        Requires an additonal river mask ("rivmsk") [-] variable in `ds_model`.
+    **kwargs:
+        arugments are passed to :py:meth:`pyflwdir.FlwdirRaster.floodplains`
+
+    Returns
+    -------
+    xr.Dataset
+        Dataset with binary floodplain classification ('fldpln') [-] and
+        hydrologically adjusted elevation ('elevtn') [m+REF]
+    """
+    # check data variables.
+    dvars_model = ["flwdir", "elevtn"]
+    if not np.all([v in ds_model for v in dvars_model]):
+        raise ValueError(f"One or more variables missing from ds_model: {dvars_model}")
+
+    # get flow directions for entire domain and for rivers
+    flwdir = flwdir_from_da(ds_model["flwdir"], mask=False)
+
+    logger.info(f"Condition elevation to D{connectivity} flow directions.")
+    # get D8 conditioned elevation
+    nodata = ds_model["elevtn"].raster.nodata
+    elv = flwdir.dem_adjust(ds_model["elevtn"].values)
+    # get D4 conditioned elevation (based on D8 conditioned!)
+    if connectivity == 4:
+        # derive D4 flow directions with forced pits at original locations
+        d4 = pyflwdir.dem.fill_depressions(
+            elevtn=elv,
+            nodata=nodata,
+            connectivity=connectivity,
+            idxs_pit=flwdir.idxs_pit,
+        )[1]
+        # condition the DEM to the new D4 flow dirs
+        flwdir_d4 = pyflwdir.from_array(
+            d4, ftype="d8", transform=flwdir.transform, latlon=flwdir.latlon
+        )
+        elv = flwdir_d4.dem_adjust(elv)
+        # condition river cells to D8
+        if adjust_river_d8 and "rivmsk" in ds_model:
+            rivmsk = ds_model["rivmsk"] == 1
+            flwdir_river = flwdir_from_da(ds_model["flwdir"], mask=rivmsk)
+            elv = flwdir_river.dem_adjust(elv)
+            # assert np.all((elv2 - flwdir_river.downstream(elv2))>=0)
+        elif adjust_river_d8:
+            logger.warning('Provide "rivmsk" variable to condition river cells in D8.')
+        # assert np.all((elv2 - flwdir_d4.downstream(elv2))>=0)
+
+    # get binary floodplain map from D8 values
+    uparea = ds_model["uparea"].values if "uparea" in ds_model else None
+    if uparea is None:
+        logger.warning(
+            '"uparea" variable calculated on the fly, '
+            "provide the variable for better floodplain classification."
+        )
+    fldpln = flwdir.floodplains(elevtn=elv, uparea=uparea, **kwargs)
+
+    # save to dataset
+    dims = ds_model.raster.dims
+    ds_out = xr.Dataset(coords=ds_model.raster.coords)
+    ds_out["elevtn"] = xr.Variable(dims, elv)
+    ds_out["elevtn"].raster.set_nodata(nodata)
+    ds_out["fldpln"] = xr.Variable(dims, fldpln)
+    ds_out["fldpln"].raster.set_nodata(-1)
+
+    ds_out.raster.set_crs(ds_model.raster.crs)
+    return ds_out
diff --git a/hydromt/gis_utils.py b/hydromt/gis_utils.py
@@ -12,13 +12,16 @@
 import geopandas as gpd
 from shapely.geometry.base import BaseGeometry
 from shapely.geometry import box
+import pygeos
 import tempfile
+import logging
 from pyflwdir import core_conversion, core_d8, core_ldd
 from pyflwdir import gis_utils as gis
-from typing import Optional
+from typing import Optional, Tuple
 
+__all__ = ["spread2d", "nearest", "nearest_merge"]
 
-__all__ = ["spread2d"]
+logger = logging.getLogger(__name__)
 
 _R = 6371e3  # Radius of earth in m. Use 3956e3 for miles
 XATTRS = {
@@ -94,6 +97,106 @@
 ## GEOM functions
 
 
+def nearest_merge(
+    gdf1: gpd.GeoDataFrame,
+    gdf2: gpd.GeoDataFrame,
+    columns: Optional[list] = None,
+    max_dist: Optional[float] = None,
+    overwrite: bool = False,
+    inplace: bool = False,
+    logger=logger,
+) -> gpd.GeoDataFrame:
+    """Merge attributes of gdf2 with the nearest feature of gdf1, optionally bounded by
+    a maximumum distance `max_dist`. Unless `overwrite = True`, gdf2 values are only
+    merged where gdf1 has missing values.
+
+    Parameters
+    ----------
+    gdf1, gdf2: geopandas.GeoDataFrame
+        Source `gdf1` and destination `gdf2` geometries.
+    columns : list of str, optional
+        Names of columns in `gdf2` to merge, by default None
+    max_dist : float, optional
+        Maximum distance threshold for merge, by default None, i.e.: no threshold.
+    overwrite : bool, optional
+        If False (default) gdf2 values are only merged where gdf1 has missing values,
+        i.e. NaN values for existing columns or missing columns.
+
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Merged GeoDataFrames
+    """
+    idx_nn, dst = nearest(gdf1, gdf2)
+    if not inplace:
+        gdf1 = gdf1.copy()
+    valid = dst < max_dist if max_dist is not None else np.ones_like(idx_nn, dtype=bool)
+    columns = gdf2.columns if columns is None else columns
+    gdf1["distance_right"] = dst
+    gdf1["index_right"] = -1
+    gdf1.loc[valid, "index_right"] = idx_nn[valid]
+    skip = ["geometry"]
+    for col in columns:
+        if col in skip or col not in gdf2:
+            if col not in gdf2:
+                logger.warning(f"Column {col} not found in gdf2 and skipped.")
+            continue
+        new_vals = gdf2.loc[idx_nn[valid], col].values
+        if col in gdf1 and not overwrite:
+            old_vals = gdf1.loc[valid, col]
+            replace = np.logical_or(old_vals.isnull(), old_vals.eq(""))
+            new_vals = np.where(replace, new_vals, old_vals)
+        gdf1.loc[valid, col] = new_vals
+    return gdf1
+
+
+def nearest(
+    gdf1: gpd.GeoDataFrame, gdf2: gpd.GeoDataFrame
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Return the index of and distance [m] to the nearest geometry
+    in `gdf2` for each geometry of `gdf1`. For Line geometries in `gdf1` the nearest
+    geometry is based line center point and for polygons on its representative point.
+    Mixed geometry types are not yet supported.
+
+    Note: Since geopandas v0.10.0 it contains a sjoin_nearest method which is very
+    similar and should.
+
+
+    Parameters
+    ----------
+    gdf1, gdf2: geopandas.GeoDataFrame
+        Source `gdf1` and destination `gdf2` geometries.
+
+    Returns
+    -------
+    index: ndarray
+        index of nearest `gdf2` geometry
+    dst: ndarray of float
+        distance to the nearest `gdf2` geometry
+    """
+    if np.all(gdf1.type == "Point"):
+        pnts = gdf1.geometry.copy()
+    elif np.all(np.isin(gdf1.type, ["LineString", "MultiLineString"])):
+        pnts = gdf1.geometry.interpolate(0.5, normalized=True)  # mid point
+    elif np.all(np.isin(gdf1.type, ["Polygon", "MultiPolygon"])):
+        pnts = gdf1.geometry.representative_point()  # inside polygon
+    else:
+        raise NotImplementedError("Mixed geometry dataframes are not yet supported.")
+    if gdf1.crs != gdf2.crs:
+        pnts = pnts.to_crs(gdf2.crs)
+    # find nearest using pygeos
+    # NOTE: requires geopandas 0.10
+    other = pygeos.from_shapely(pnts.geometry.values)
+    idx = gdf2.sindex.nearest(other, return_all=False)[1]
+    # get distance in meters
+    gdf2_nearest = gdf2.iloc[idx]
+    if gdf2_nearest.crs.is_geographic:
+        pnts = pnts.to_crs(3857)  # web mercator
+        gdf2_nearest = gdf2_nearest.to_crs(3857)
+    dst = gdf2_nearest.distance(pnts, align=False).values
+    return gdf2.index.values[idx], dst
+
+
 def filter_gdf(gdf, geom=None, bbox=None, predicate="intersects"):
     """Filter GeoDataFrame geometries based on geometry mask or bounding box."""
     gtypes = (gpd.GeoDataFrame, gpd.GeoSeries, BaseGeometry)
@@ -273,9 +376,9 @@ def cellres(lat, xres=1.0, yres=1.0):
 
 def spread2d(
     da_obs: xr.DataArray,
-    nodata: Optional[float] = None,
     da_mask: Optional[xr.DataArray] = None,
     da_friction: Optional[xr.DataArray] = None,
+    nodata: Optional[float] = None,
 ) -> xr.Dataset:
     """Returns values of `da_obs` spreaded to cells with `nodata` value within `da_mask`,
     powered by :py:meth:`pyflwdir.gis_utils.spread2d`
@@ -285,14 +388,14 @@ def spread2d(
     da_obs : xarray.DataArray
         Input raster with observation values and background/nodata values which are
         filled by the spreading algorithm.
-    nodata : float, optional
-        Nodata or background value. Must be finite numeric value. If not given the
-        raster nodata value is used.
     da_mask :  xarray.DataArray, optional
         Mask of cells to fill with the spreading algorithm, by default None
     da_friction :  xarray.DataArray, optional
         Friction values used by the spreading algorithm to calcuate the friction
         distance, by default None
+    nodata : float, optional
+        Nodata or background value. Must be finite numeric value. If not given the
+        raster nodata value is used.
 
     Returns
     -------

diff --git a/hydromt/workflows/__init__.py b/hydromt/workflows/__init__.py
@@ -4,3 +4,4 @@
 from .basemaps import *
 from .basin_mask import *
 from .forcing import *
+from .rivers import *