From 163dee6a920aa8e6576a6d652ac450b121d5d8de Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 14:26:38 +0800 Subject: [PATCH 01/20] use xugrid in mesh methods #420 --- hydromt/models/model_mesh.py | 105 ++++++++------------ hydromt/workflows/mesh.py | 187 +++++++++++++++++++++++++++++++++-- tests/test_model.py | 9 +- 3 files changed, 226 insertions(+), 75 deletions(-) diff --git a/hydromt/models/model_mesh.py b/hydromt/models/model_mesh.py index 0037a98b8..a423f6750 100644 --- a/hydromt/models/model_mesh.py +++ b/hydromt/models/model_mesh.py @@ -6,7 +6,6 @@ from typing import Dict, List, Optional, Union import geopandas as gpd -import numpy as np import pandas as pd import xarray as xr import xugrid as xu @@ -39,8 +38,7 @@ def setup_mesh2d_from_rasterdataset( grid_name: Optional[str] = "mesh2d", variables: Optional[list] = None, fill_method: Optional[str] = None, - resampling_method: Optional[str] = "mean", - all_touched: Optional[bool] = True, + resampling_method: Optional[Union[str, List]] = "centroid", rename: Optional[Dict] = None, ) -> List[str]: """HYDROMT CORE METHOD: Add data variable(s) from ``raster_fn`` to 2D ``grid_name`` in mesh object. @@ -64,13 +62,14 @@ def setup_mesh2d_from_rasterdataset( fill_method : str, optional If specified, fills no data values using fill_nodata method. Available methods are {'linear', 'nearest', 'cubic', 'rio_idw'}. - resampling_method: str, optional + resampling_method: str, list, optional Method to sample from raster data to mesh. By default mean. Options include - {'count', 'min', 'max', 'sum', 'mean', 'std', 'median', 'q##'}. - all_touched : bool, optional - If True, all pixels touched by geometries will used to define the sample. - If False, only pixels whose center is within the geometry or that are - selected by Bresenham's line algorithm will be used. By default True. + {"centroid", "barycentric", "mean", "harmonic_mean", "geometric_mean", "sum", + "minimum", "maximum", "mode", "median", "max_overlap"}. If centroid, will use + :py:meth:`xugrid.CentroidLocatorRegridder` method. If barycentric, will use + :py:meth:`xugrid.BarycentricInterpolator` method. If any other, will use + :py:meth:`xugrid.OverlapRegridder` method. + Can provide a list corresponding to ``variables``. rename: dict, optional Dictionary to rename variable names in raster_fn before adding to mesh {'name_in_raster_fn': 'name_in_mesh'}. By default empty. @@ -80,7 +79,6 @@ def setup_mesh2d_from_rasterdataset( list List of variables added to mesh. """ # noqa: E501 - rename = rename or {} self.logger.info(f"Preparing mesh data from raster source {raster_fn}") # Check if grid name in self.mesh if grid_name not in self.mesh_names: @@ -89,28 +87,20 @@ def setup_mesh2d_from_rasterdataset( ds = self.data_catalog.get_rasterdataset( raster_fn, bbox=self.bounds[grid_name], buffer=2, variables=variables ) - if isinstance(ds, xr.DataArray): - ds = ds.to_dataset() - - if fill_method is not None: - ds = ds.raster.interpolate_na(method=fill_method) - - # Convert mesh grid as geodataframe for sampling - # Reprojection happens to gdf inside of zonal_stats method - ds_sample = ds.raster.zonal_stats( - gdf=self.mesh_gdf[grid_name], - stats=resampling_method, - all_touched=all_touched, + + uds_sample = workflows.mesh2d_from_rasterdataset( + ds=ds, + mesh2d=self.mesh_grids[grid_name], + variables=variables, + fill_method=fill_method, + resampling_method=resampling_method, + rename=rename, + logger=self.logger, ) - # Rename variables - rm_dict = {f"{var}_{resampling_method}": var for var in ds.data_vars} - ds_sample = ds_sample.rename(rm_dict).rename(rename) - # Convert to UgridDataset - uds_sample = xu.UgridDataset(ds_sample, grids=self.mesh_grids[grid_name]) self.set_mesh(uds_sample, grid_name=grid_name, overwrite_grid=False) - return list(ds_sample.data_vars.keys()) + return list(uds_sample.data_vars.keys()) def setup_mesh2d_from_raster_reclass( self, @@ -119,9 +109,8 @@ def setup_mesh2d_from_raster_reclass( reclass_variables: list, grid_name: Optional[str] = "mesh2d", variable: Optional[str] = None, - fill_nodata: Optional[str] = None, - resampling_method: Optional[Union[str, list]] = "mean", - all_touched: Optional[bool] = True, + fill_method: Optional[str] = None, + resampling_method: Optional[Union[str, list]] = "centroid", rename: Optional[Dict] = None, **kwargs, ) -> List[str]: @@ -151,19 +140,18 @@ def setup_mesh2d_from_raster_reclass( variable : str, optional Name of the raster dataset variable to use. This is only required when reading datasets with multiple variables. By default, None. - fill_nodata : str, optional - If specified, fills nodata values in `raster_fn` using the `fill_nodata` + fill_method : str, optional + If specified, fills nodata values in `raster_fn` using the `fill_method` method before reclassifying. Available methods are {'linear', 'nearest', 'cubic', 'rio_idw'}. resampling_method : str or list, optional - Method to sample from raster data to the mesh. Can be a list per variable - in `reclass_variables` or a single method for all. By default, 'mean' is - used for all `reclass_variables`. Options include {'count', 'min', 'max', - 'sum', 'mean', 'std', 'median', 'q##'}. - all_touched : bool, optional - If True, all pixels touched by geometries will be used to define the sample. - If False, only pixels whose center is within the geometry or that are - selected by Bresenham's line algorithm will be used. By default, True. + Method to sample from raster data to mesh. By default mean. Options include + {"centroid", "barycentric", "mean", "harmonic_mean", "geometric_mean", "sum", + "minimum", "maximum", "mode", "median", "max_overlap"}. If centroid, will use + :py:meth:`xugrid.CentroidLocatorRegridder` method. If barycentric, will use + :py:meth:`xugrid.BarycentricInterpolator` method. If any other, will use + :py:meth:`xugrid.OverlapRegridder` method. + Can provide a list corresponding to ``reclass_variables``. rename : dict, optional Dictionary to rename variable names in `reclass_variables` before adding them to the mesh. The dictionary should have the form @@ -182,7 +170,6 @@ def setup_mesh2d_from_raster_reclass( ValueError If `raster_fn` is not a single variable raster. """ # noqa: E501 - rename = rename or {} self.logger.info( f"Preparing mesh data by reclassifying the data in {raster_fn} " f"based on {reclass_table_fn}." @@ -207,34 +194,20 @@ def setup_mesh2d_from_raster_reclass( reclass_table_fn, variables=reclass_variables ) - if fill_nodata is not None: - da = da.raster.interpolate_na(method=fill_nodata) - - # Mapping function - ds_vars = da.raster.reclassify(reclass_table=df_vars, method="exact") - - # Convert mesh grid as geodataframe for sampling - # Reprojection happens to gdf inside of zonal_stats method - ds_sample = ds_vars.raster.zonal_stats( - gdf=self.mesh_gdf[grid_name], - stats=np.unique(np.atleast_1d(resampling_method)), - all_touched=all_touched, + uds_sample = workflows.mesh2d_from_raster_reclass( + da=da, + df_vars=df_vars, + mesh2d=self.mesh_grids[grid_name], + reclass_variables=reclass_variables, + fill_method=fill_method, + resampling_method=resampling_method, + rename=rename, + logger=self.logger, ) - # Rename variables - if isinstance(resampling_method, str): - resampling_method = np.repeat(resampling_method, len(reclass_variables)) - rm_dict = { - f"{var}_{mtd}": var - for var, mtd in zip(reclass_variables, resampling_method) - } - ds_sample = ds_sample.rename(rm_dict).rename(rename) - ds_sample = ds_sample[reclass_variables] - # Convert to UgridDataset - uds_sample = xu.UgridDataset(ds_sample, grids=self.mesh_grids[grid_name]) self.set_mesh(uds_sample, grid_name=grid_name, overwrite_grid=False) - return list(ds_sample.data_vars.keys()) + return list(uds_sample.data_vars.keys()) @property def mesh(self) -> Union[xu.UgridDataArray, xu.UgridDataset]: diff --git a/hydromt/workflows/mesh.py b/hydromt/workflows/mesh.py index 74bdcda3b..42e908554 100644 --- a/hydromt/workflows/mesh.py +++ b/hydromt/workflows/mesh.py @@ -1,8 +1,11 @@ """Implementation for mesh based workflows.""" import logging -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Union import geopandas as gpd +import numpy as np +import pandas as pd +import xarray as xr import xugrid as xu from pyproj import CRS from shapely.geometry import box @@ -16,6 +19,8 @@ __all__ = [ "create_mesh2d", + "mesh2d_from_rasterdataset", + "mesh2d_from_raster_reclass", "rename_mesh", ] @@ -187,14 +192,184 @@ def create_mesh2d( raise IndexError(err) mesh2d = subset - # Reproject to user crs option if needed - if mesh2d.ugrid.grid.crs != crs: - logger.info(f"Reprojecting mesh to crs {crs}") - mesh2d.ugrid.grid.to_crs(crs) - return mesh2d +def mesh2d_from_rasterdataset( + ds: Union[xr.DataArray, xr.Dataset], + mesh2d: Union[xu.UgridDataArray, xu.Ugrid2d], + variables: Optional[List] = None, + fill_method: Optional[str] = None, + resampling_method: Optional[str] = "centroid", + rename: Optional[Dict] = None, + logger: logging.Logger = logger, +) -> xu.UgridDataset: + """ + Resamples data in ds to mesh2d. + + Raster data is interpolated to the mesh using the ``resampling_method``. + + Parameters + ---------- + ds: xr.DataArray, xr.Dataset + Raster xarray data object. + mesh2d: xu.UgridDataArray, xu.Ugrid2d + Mesh2d grid to resample to. + variables: list, optional + List of variables to resample. By default all variables in ds. + fill_method : str, optional + If specified, fills no data values using fill_nodata method. + Available methods are {'linear', 'nearest', 'cubic', 'rio_idw'}. + resampling_method: str, optional + Method to sample from raster data to mesh. By default mean. Options include + {"centroid", "barycentric", "mean", "harmonic_mean", "geometric_mean", "sum", + "minimum", "maximum", "mode", "median", "max_overlap"}. If centroid, will use + :py:meth:`xugrid.CentroidLocatorRegridder` method. If barycentric, will use + :py:meth:`xugrid.BarycentricInterpolator` method. If any other, will use + :py:meth:`xugrid.OverlapRegridder` method. + Can provide a list corresponding to ``variables``. + rename: dict, optional + Dictionary to rename variable names in ds + {'name_ds': 'name_in_uds_out'}. By default empty. + + Returns + ------- + uds_out: xu.UgridDataset + Resampled data on mesh2d. + """ + rename = rename or {} + if isinstance(ds, xr.DataArray): + ds = ds.to_dataset() + if variables is not None: + ds = ds[variables] + + if fill_method is not None: + ds = ds.raster.interpolate_na(method=fill_method) + + # check resampling method + resampling_method = np.atleast_1d(resampling_method) + if len(resampling_method) == 1: + resampling_method = np.repeat(resampling_method, len(ds.data_vars)) + # one reproject method per variable + elif len(resampling_method) != len(variables): + raise ValueError( + f"resampling_method should have length 1 or {len(ds.data_vars)}" + ) + + # Prepare regridder + regridder = dict() + # Get one variable name in ds to simplify to da + var = [v for v in ds.data_vars][0] + uda = xu.UgridDataArray.from_structured( + ds[var].rename({ds.raster.x_dim: "x", ds.raster.y_dim: "y"}) + ) + uda.ugrid.set_crs(ds.raster.crs) + for method in np.unique(resampling_method): + logger.info(f"Preparing regridder for {method} method") + if method == "centroid": + regridder[method] = xu.CentroidLocatorRegridder(uda, mesh2d) + elif method == "barycentric": + regridder[method] = xu.BarycentricInterpolator(uda, mesh2d) + else: + regridder[method] = xu.OverlapRegridder(uda, mesh2d, method=method) + + # Convert ds to xugrid + for i, var in enumerate(ds.data_vars): + logger.info(f"Resampling {var} to mesh2d using {resampling_method[i]} method") + uda = xu.UgridDataArray.from_structured( + ds[var].rename({ds.raster.x_dim: "x", ds.raster.y_dim: "y"}) + ) + uda.ugrid.set_crs(ds.raster.crs) + # Interpolate + method = resampling_method[i] + # Interpolate + uda_out = regridder[method].regrid(uda) + # Add to uds_out + if i == 0: + uds_out = uda_out.to_dataset() + else: + uds_out[var] = uda_out + + # Rename variables + if rename is not None: + uds_out = uds_out.rename(rename) + + return uds_out + + +def mesh2d_from_raster_reclass( + da: xr.DataArray, + df_vars: pd.DataFrame, + mesh2d: Union[xu.UgridDataArray, xu.Ugrid2d], + reclass_variables: list, + fill_method: Optional[str] = None, + resampling_method: Optional[Union[str, list]] = "centroid", + rename: Optional[Dict] = None, + logger: logging.Logger = logger, +) -> List[str]: + """Resample data to ``mesh2d`` grid by reclassifying the data in ``da`` based on ``df_vars``. + + The reclassified raster data + are subsequently interpolated to the mesh using `resampling_method`. + + Parameters + ---------- + da : xr.DataArray + Raster xarray DataArray object. + df_vars : pd.DataFrame + Tabular pandas dataframe object for the reclassification table of `da`. + mesh2d: xu.UgridDataArray, xu.Ugrid2d + Mesh2d grid to resample to. + reclass_variables : list + List of reclass_variables from the df_vars table to add to the + mesh. The index column should match values in da. + fill_method : str, optional + If specified, fills nodata values in `da` using the `fill_method` + method before reclassifying. Available methods are + {'linear', 'nearest', 'cubic', 'rio_idw'}. + resampling_method: str, list, optional + Method to sample from raster data to mesh. By default mean. Options include + {"centroid", "barycentric", "mean", "harmonic_mean", "geometric_mean", "sum", + "minimum", "maximum", "mode", "median", "max_overlap"}. If centroid, will use + :py:meth:`xugrid.CentroidLocatorRegridder` method. If barycentric, will use + :py:meth:`xugrid.BarycentricInterpolator` method. If any other, will use + :py:meth:`xugrid.OverlapRegridder` method. + Can provide a list corresponding to ``reclass_variables``. + rename : dict, optional + Dictionary to rename variable names in `reclass_variables` before adding + them to the mesh. The dictionary should have the form + {'name_in_reclass_table': 'name_in_uds_out'}. By default, an empty dictionary. + + Returns + ------- + uds_out : xu.UgridDataset + Resampled data on mesh2d. + + See Also + -------- + mesh2d_from_rasterdataset + """ # noqa: E501 + rename = rename or {} + + if fill_method is not None: + da = da.raster.interpolate_na(method=fill_method) + + # Mapping function + ds_vars = da.raster.reclassify(reclass_table=df_vars, method="exact") + + uds_out = mesh2d_from_rasterdataset( + ds_vars, + mesh2d, + variables=reclass_variables, + fill_method=None, + resampling_method=resampling_method, + rename=rename, + logger=logger, + ) + + return uds_out + + def rename_mesh(mesh: Union[xu.UgridDataArray, xu.UgridDataset], name: str): """ Rename all grid variables in mesh according to UGRID conventions. diff --git a/tests/test_model.py b/tests/test_model.py index f9c77a102..50576f2a8 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -712,13 +712,16 @@ def test_meshmodel_setup(griduda, world): region = {"mesh": griduda} mod1 = MeshModel(data_libs=["artifact_data", dc_param_fn]) mod1.setup_mesh2d(region, grid_name="mesh2d") - mod1.setup_mesh2d_from_rasterdataset("vito", grid_name="mesh2d") + mod1.setup_mesh2d_from_rasterdataset( + "vito", grid_name="mesh2d", resampling_method="mode" + ) assert "vito" in mod1.mesh.data_vars mod1.setup_mesh2d_from_raster_reclass( raster_fn="vito", reclass_table_fn="vito_mapping", - reclass_variables=["roughness_manning"], - resampling_method="mean", + reclass_variables=["landuse", "roughness_manning"], + resampling_method=["mode", "centroid"], grid_name="mesh2d", ) assert "roughness_manning" in mod1.mesh.data_vars + assert np.all(mod1.mesh["landuse"].values == mod1.mesh["vito"].values) From 5ab1e2390492d263a7b9a31251ecf93ea85dd0c2 Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 14:30:29 +0800 Subject: [PATCH 02/20] update changelog --- docs/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index a766ba00d..4b8352536 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,6 +35,7 @@ Changed - possibility to ``load`` the data in the model read_ functions for netcdf files (default for read_grid in r+ mode). (PR #460) - Internal model components (e.g. `Models._maps`, `GridModel._grid``) are now initialized with None and should not be accessed directly, call the corresponding model property (e.g. `Model.maps`, `GridModel.grid`) instead. (PR #473) +- ``setup_mesh2d_from_rasterdataset`` and ``setup_mesh2d_from_raster_reclass`` now use xugrid Regridder methods. (PR #535) - Use the Model.data_catalog to read the model region if defined by a geom or grid. (PR #479) Fixed From aa4a320f59ad6e14ec0984f033b6e3aaf2f9d18a Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 14:45:29 +0800 Subject: [PATCH 03/20] Add conda info to github docs and tests workflows --- .github/workflows/docs.yml | 5 +++++ .github/workflows/tests.yml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1e8fdc2d7..0bcf02b0e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -68,6 +68,11 @@ jobs: - name: Install hydromt run: pip install . + - name: Conda info + run: | + conda info + conda list + - name: test docs if: ${{ github.event_name == 'pull_request' }} run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7821c0ecf..953893ff4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -61,6 +61,11 @@ jobs: run: mamba env update -n hydromt -f environment.yml if: steps.cache.outputs.cache-hit != 'true' + - name: Conda info + run: | + conda info + conda list + - name: Test run: python -m pytest --verbose --cov=hydromt --cov-report xml From c3aaac8688d695376b705edee41260deaade3532 Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 15:30:32 +0800 Subject: [PATCH 04/20] temporary xarray pin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 870ad68d3..2e47ceb76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "scipy", # scientific utilities "tomli", # parsing toml files "tomli-w", # writing toml files - "xarray", # ndim data + "xarray<=2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT "zarr", # zarr From 8a2c96d1e3295c4a6b25862703566b42fea7a753 Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 15:33:14 +0800 Subject: [PATCH 05/20] remove skip on xugrid tests --- tests/test_model.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_model.py b/tests/test_model.py index 50576f2a8..60a0e6e5e 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -67,7 +67,6 @@ def test_load(): # test both with and without xugrid -@pytest.mark.parametrize("has_xugrid", [hydromt._compat.HAS_XUGRID, False]) def test_global_models(mocker, has_xugrid): mocker.patch("hydromt._compat.HAS_XUGRID", has_xugrid) keys = list(model_plugins.LOCAL_EPS.keys()) @@ -611,7 +610,6 @@ def test_networkmodel(network_model, tmpdir): _ = network_model.network -@pytest.mark.skipif(not hasattr(hydromt, "MeshModel"), reason="Xugrid not installed.") def test_meshmodel(mesh_model, tmpdir): MeshModel = MODELS.load("mesh_model") assert "mesh" in mesh_model.api @@ -628,7 +626,6 @@ def test_meshmodel(mesh_model, tmpdir): assert equal, errors -@pytest.mark.skipif(not hasattr(hydromt, "MeshModel"), reason="Xugrid not installed.") def test_setup_mesh(tmpdir, griduda): MeshModel = MODELS.load("mesh_model") # Initialize model @@ -699,7 +696,6 @@ def test_setup_mesh(tmpdir, griduda): assert np.all(np.round(model.region.total_bounds, 3) == bounds) -@pytest.mark.skipif(not hasattr(hydromt, "MeshModel"), reason="Xugrid not installed.") def test_meshmodel_setup(griduda, world): MeshModel = MODELS.load("mesh_model") dc_param_fn = join(DATADIR, "parameters_data.yml") From 3c35906b3a47bfcbe91a90a97cd3c2aec40631b5 Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 15:44:38 +0800 Subject: [PATCH 06/20] bugfix tests --- tests/test_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_model.py b/tests/test_model.py index 60a0e6e5e..4e47611be 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -67,6 +67,7 @@ def test_load(): # test both with and without xugrid +@pytest.mark.parametrize("has_xugrid", [hydromt._compat.HAS_XUGRID, False]) def test_global_models(mocker, has_xugrid): mocker.patch("hydromt._compat.HAS_XUGRID", has_xugrid) keys = list(model_plugins.LOCAL_EPS.keys()) From 1bb116ece8411ecb383f0a92dc10fc0ae3c2db9e Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 15:47:47 +0800 Subject: [PATCH 07/20] strict < in xarray pin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2e47ceb76..8d28f0d9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "scipy", # scientific utilities "tomli", # parsing toml files "tomli-w", # writing toml files - "xarray<=2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release + "xarray<2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT "zarr", # zarr From 4dbc49cce8111a9d5c1022b5da30db2f62ad2e8f Mon Sep 17 00:00:00 2001 From: hboisgon Date: Fri, 29 Sep 2023 17:13:54 +0800 Subject: [PATCH 08/20] update api docs for the workflows --- docs/api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 66ec0347a..1bab5e5af 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -398,6 +398,8 @@ Mesh :toctree: _generated workflows.mesh.create_mesh2d + workflows.mesh.mesh2d_from_rasterdataset + workflows.mesh.mesh2d_from_raster_reclass Basin mask From a302a5c3c83efc6cd974969d1d4ace0eea24e252 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 2 Oct 2023 09:23:07 +0200 Subject: [PATCH 09/20] make GHA always save cache, even on failure --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 953893ff4..aa4eebe1e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: - name: Update environment & write to cache run: mamba env update -n hydromt -f environment.yml - if: steps.cache.outputs.cache-hit != 'true' + if: always() - name: Conda info run: | From 2c49de700dfe42697624fcfcf6d7912fda7a49b1 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 2 Oct 2023 10:10:28 +0200 Subject: [PATCH 10/20] Revert "make GHA always save cache, even on failure" This reverts commit a302a5c3c83efc6cd974969d1d4ace0eea24e252. --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index aa4eebe1e..953893ff4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: - name: Update environment & write to cache run: mamba env update -n hydromt -f environment.yml - if: always() + if: steps.cache.outputs.cache-hit != 'true' - name: Conda info run: | From f6933352c8eff59c340209199e9a01f7dd1b8c19 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Wed, 4 Oct 2023 16:10:07 +0200 Subject: [PATCH 11/20] Revert "strict < in xarray pin" This reverts commit 1bb116ece8411ecb383f0a92dc10fc0ae3c2db9e. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d28f0d9c..2e47ceb76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "scipy", # scientific utilities "tomli", # parsing toml files "tomli-w", # writing toml files - "xarray<2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release + "xarray<=2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT "zarr", # zarr From 03b9034758a322209a5395ce792de2d1009b776a Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 11:11:11 +0200 Subject: [PATCH 12/20] precompile xugrid regridder in conftest --- tests/conftest.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index c3bceee45..ef62c16f8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -321,3 +321,23 @@ def artifact_data(): datacatalog = DataCatalog() datacatalog.from_predefined_catalogs("artifact_data") return datacatalog + + +@pytest.fixture(scope="session", autouse=True) +def _compile_xugrid_methods() -> None: + da = xr.DataArray( + data=np.random.rand(15, 10), + dims=("y", "x"), + coords={"y": -np.arange(0, 15), "x": np.arange(0, 10)}, + attrs=dict(_FillValue=-9999), + ) + da.raster.set_crs(4326) + uda_src = xu.UgridDataArray.from_structured(da) + uda_src.ugrid.set_crs(da.raster.crs) + uda_dst = xu.UgridDataset.from_geodataframe(da.raster.box) + uda_dst.ugrid.set_crs(da.raster.crs) + # this should trigger the compilation of the numba_celltree methods + # used in xugrid to make sure these don't affect the test timeout resutls + xu.CentroidLocatorRegridder(uda_src, uda_dst).regrid(uda_src) + xu.BarycentricInterpolator(uda_src, uda_dst).regrid(uda_src) + xu.OverlapRegridder(uda_src, uda_dst, method="mode").regrid(uda_src) From e3de5175417d472cba7acd11f3d51b34a8597cd1 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 11:35:05 +0200 Subject: [PATCH 13/20] increase timeout on test using xugrid regridding --- tests/conftest.py | 9 ++++----- tests/test_model.py | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b5938b062..5b12de112 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -324,12 +324,12 @@ def artifact_data(): return datacatalog -@pytest.fixture(scope="session", autouse=True) -def _compile_xugrid_methods() -> None: +@pytest.fixture() +def _compile_xugrid_regrid() -> None: da = xr.DataArray( - data=np.random.rand(15, 10), + data=np.random.rand(2, 3), dims=("y", "x"), - coords={"y": -np.arange(0, 15), "x": np.arange(0, 10)}, + coords={"y": -np.arange(0, 2), "x": np.arange(0, 3)}, attrs=dict(_FillValue=-9999), ) da.raster.set_crs(4326) @@ -340,5 +340,4 @@ def _compile_xugrid_methods() -> None: # this should trigger the compilation of the numba_celltree methods # used in xugrid to make sure these don't affect the test timeout resutls xu.CentroidLocatorRegridder(uda_src, uda_dst).regrid(uda_src) - xu.BarycentricInterpolator(uda_src, uda_dst).regrid(uda_src) xu.OverlapRegridder(uda_src, uda_dst, method="mode").regrid(uda_src) diff --git a/tests/test_model.py b/tests/test_model.py index d8ac131ad..74eec2220 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -767,6 +767,9 @@ def test_setup_mesh(tmpdir, griduda): assert np.all(np.round(model.region.total_bounds, 3) == bounds) +# allow for longer timeout to compile xugrid methods +@pytest.mark.timeout(300) +@pytest.mark.usefixtures("_compile_xugrid_regrid") def test_meshmodel_setup(griduda, world): MeshModel = MODELS.load("mesh_model") dc_param_fn = join(DATADIR, "parameters_data.yml") From 5c12eded00e776ae2dbadb6fca89002fe9b330f7 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 11:58:09 +0200 Subject: [PATCH 14/20] try testing with numba jit disabled --- pyproject.toml | 4 +++- tests/test_model.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 857940604..222c9230a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "scipy", # scientific utilities "tomli", # parsing toml files "tomli-w", # writing toml files - "xarray<=2023.9.0", # ndim data - temporary max to wait for xugrid bugfix release + "xarray", # ndim data - temporary max to wait for xugrid bugfix release "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT "zarr", # zarr @@ -73,6 +73,7 @@ dev = [ test = [ "pytest>=2.7.3", # testing framework "pytest-cov", # test coverage + "pytest-env", # set env vars in tests "pytest-mock", # mocking "pytest-timeout", # darn hanging tests "xugrid", @@ -138,6 +139,7 @@ exclude = ["docs", "examples", "envs", "tests", "binder", ".github"] [tool.pytest.ini_options] addopts = "--ff --timeout=120 " testpaths = ["tests"] +env = ["NUMBA_DISABLE_JIT=1"] filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning", diff --git a/tests/test_model.py b/tests/test_model.py index 74eec2220..2d8c289f8 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -768,8 +768,8 @@ def test_setup_mesh(tmpdir, griduda): # allow for longer timeout to compile xugrid methods -@pytest.mark.timeout(300) -@pytest.mark.usefixtures("_compile_xugrid_regrid") +# @pytest.mark.timeout(300) +# @pytest.mark.usefixtures("_compile_xugrid_regrid") def test_meshmodel_setup(griduda, world): MeshModel = MODELS.load("mesh_model") dc_param_fn = join(DATADIR, "parameters_data.yml") From 8f1e8a5ccd46c72e10fe8e83c27f3a244eb5cebd Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 13:20:25 +0200 Subject: [PATCH 15/20] test without pytest-env --- .github/workflows/tests.yml | 3 ++- pyproject.toml | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 10b91cbd1..946f1bde9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -72,9 +72,10 @@ jobs: - name: Conda info run: | conda info - conda list + conda list -n hydromt - name: Test run: | export PATH=/usr/share/miniconda3/bin:$PATH + export NUMBA_DISABLE_JIT=1 PYTHONPYCACHEPREFIX=~/pycache mamba run -n hydromt python -m pytest --verbose --cov=hydromt --cov-report xml diff --git a/pyproject.toml b/pyproject.toml index 222c9230a..b29e698aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,6 @@ dev = [ test = [ "pytest>=2.7.3", # testing framework "pytest-cov", # test coverage - "pytest-env", # set env vars in tests "pytest-mock", # mocking "pytest-timeout", # darn hanging tests "xugrid", @@ -139,7 +138,6 @@ exclude = ["docs", "examples", "envs", "tests", "binder", ".github"] [tool.pytest.ini_options] addopts = "--ff --timeout=120 " testpaths = ["tests"] -env = ["NUMBA_DISABLE_JIT=1"] filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning", From 358d662667f32e7a93b63e3cd2684d5eca73f3eb Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 13:26:30 +0200 Subject: [PATCH 16/20] fix conda path --- .github/workflows/tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 946f1bde9..5db56445e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -71,8 +71,9 @@ jobs: - name: Conda info run: | - conda info - conda list -n hydromt + export PATH=/usr/share/miniconda3/bin:$PATH + conda info + conda list -n hydromt - name: Test run: | From 7eeeb34db636330fdcef49053a7cbd8f2e9deee0 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 14:18:01 +0200 Subject: [PATCH 17/20] speedup test --- .github/workflows/test-docker.yml | 2 +- tests/test_basin_mask.py | 49 ++++++++++++++++--------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 26c6e59ca..4955f0529 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -47,7 +47,7 @@ jobs: cache-to: type=gha,mode=max - name: Run Tests - run: docker run --rm hydromt pytest + run: docker run --rm hydromt pytest --env NUMBA_DISABLE_JIT=1 - name: Test Binder integration with repo2docker run: | diff --git a/tests/test_basin_mask.py b/tests/test_basin_mask.py index cda241673..71c713e2e 100644 --- a/tests/test_basin_mask.py +++ b/tests/test_basin_mask.py @@ -136,9 +136,10 @@ def test_check_size(caplog): ) +@pytest.mark.filterwarnings("ignore::UserWarning") def test_basin(caplog): - data_catalog = hydromt.DataCatalog(logger=logger) - ds = data_catalog.get_rasterdataset("merit_hydro") + data_catalog = hydromt.DataCatalog("artifact_data", logger=logger) + ds = data_catalog.get_rasterdataset("merit_hydro_1k") gdf_bas_index = data_catalog.get_geodataframe("merit_hydro_index") bas_index = data_catalog.get_source("merit_hydro_index") @@ -157,15 +158,15 @@ def test_basin(caplog): ) assert gdf_out is None assert gdf_bas.index.size == 1 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 9346337868.28675) + assert np.isclose(gdf_bas.area.sum(), 0.16847222) gdf_bas, gdf_out = get_basin_geometry( ds, kind="subbasin", basin_index=bas_index, xy=[12.2051, 45.8331], strord=4 ) assert gdf_bas.index.size == 1 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 8.277817e09) - assert np.isclose(gdf_out.geometry.x, 12.205417) - assert np.isclose(gdf_out.geometry.y, 45.83375) + assert np.isclose(gdf_bas.area.sum(), 0.001875) + assert np.isclose(gdf_out.geometry.x, 12.17916667) + assert np.isclose(gdf_out.geometry.y, 45.8041666) gdf_bas, gdf_out = get_basin_geometry( ds, @@ -175,9 +176,9 @@ def test_basin(caplog): strord=5, ) assert gdf_bas.index.size == 2 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 8.446160e09) - assert np.isclose(gdf_out.geometry.x[1], 12.97292) - assert np.isclose(gdf_out.geometry.y[1], 45.69958) + assert np.isclose(gdf_bas.area.sum(), 0.021389) + assert np.isclose(gdf_out.geometry.x[1], 12.970833333333266) + assert np.isclose(gdf_out.geometry.y[1], 45.69583333333334) gdf_bas, gdf_out = get_basin_geometry( ds, @@ -187,9 +188,9 @@ def test_basin(caplog): bounds=gdf_bas.total_bounds, ) assert gdf_bas.index.size == 1 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 8.277817e09) - assert np.isclose(gdf_out.geometry.x, 12.205417) - assert np.isclose(gdf_out.geometry.y, 45.83375) + assert np.isclose(gdf_bas.area.sum(), 0.001875) + assert np.isclose(gdf_out.geometry.x, 12.179167) + assert np.isclose(gdf_out.geometry.y, 45.804167) gdf_bas, gdf_out = get_basin_geometry( ds, @@ -198,8 +199,8 @@ def test_basin(caplog): bbox=[12.6, 45.5, 12.9, 45.7], buffer=1, ) - assert gdf_bas.index.size == 470 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 18433536552.16195) + assert gdf_bas.index.size == 30 + assert np.isclose(gdf_bas.area.sum(), 1.033125) gdf_bas, gdf_out = get_basin_geometry( ds, @@ -209,30 +210,30 @@ def test_basin(caplog): buffer=1, strord=4, ) - assert gdf_bas.index.size == 6 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 18407888488.828384) + assert gdf_bas.index.size == 4 + assert np.isclose(gdf_bas.area.sum(), 1.03104167) gdf_bas, gdf_out = get_basin_geometry( ds, kind="subbasin", basin_index=gdf_bas_index, bbox=[12.2, 46.2, 12.4, 46.3], - strord=8, + strord=6, ) assert gdf_bas.index.size == 1 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 3569393882.735242) - assert np.isclose(gdf_out.geometry.x, 12.300417) + assert np.isclose(gdf_bas.area.sum(), 0.198055) + assert np.isclose(gdf_out.geometry.x, 12.295833) gdf_bas, gdf_out = get_basin_geometry( ds, kind="interbasin", basin_index=gdf_bas_index, bbox=[12.2, 46.2, 12.4, 46.3], - strord=8, + strord=6, ) assert gdf_bas.index.size == 1 - assert np.isclose(gdf_bas.to_crs(3857).area.sum(), 307314959.5972775) - assert np.isclose(gdf_out.geometry.x, 12.300417) + assert np.isclose(gdf_bas.area.sum(), 0.0172222) + assert np.isclose(gdf_out.geometry.x, 12.295833) gdf_bas, gdf_out = get_basin_geometry( ds, @@ -241,7 +242,7 @@ def test_basin(caplog): bbox=[12.8, 45.55, 12.9, 45.65], outlets=True, ) - assert gdf_bas.index.size == 180 + assert gdf_bas.index.size == 13 gdf_bas, gdf_out = get_basin_geometry( ds, @@ -250,7 +251,7 @@ def test_basin(caplog): bbox=[12.8, 45.55, 12.9, 45.65], outlets=True, ) - assert gdf_bas.index.size == 180 + assert gdf_bas.index.size == 13 msg = ( 'kind="outlets" has been deprecated, use outlets=True in combination with' From 335bb2b03726f2787b139f273d1f729cfb25fff6 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 14:26:14 +0200 Subject: [PATCH 18/20] fix docker env --- .github/workflows/test-docker.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index 4955f0529..d09c89a25 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -47,8 +47,7 @@ jobs: cache-to: type=gha,mode=max - name: Run Tests - run: docker run --rm hydromt pytest --env NUMBA_DISABLE_JIT=1 - + run: docker run --env NUMBA_DISABLE_JIT=1 --rm hydromt pytest - name: Test Binder integration with repo2docker run: | pip install jupyter-repo2docker From 8652f68b878506dc9b3f818cc3309259c9578ee5 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 14:34:26 +0200 Subject: [PATCH 19/20] cleanup --- tests/conftest.py | 19 ------------------- tests/test_model.py | 3 --- 2 files changed, 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5b12de112..f0f960f86 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -322,22 +322,3 @@ def artifact_data(): datacatalog = DataCatalog() datacatalog.from_predefined_catalogs("artifact_data") return datacatalog - - -@pytest.fixture() -def _compile_xugrid_regrid() -> None: - da = xr.DataArray( - data=np.random.rand(2, 3), - dims=("y", "x"), - coords={"y": -np.arange(0, 2), "x": np.arange(0, 3)}, - attrs=dict(_FillValue=-9999), - ) - da.raster.set_crs(4326) - uda_src = xu.UgridDataArray.from_structured(da) - uda_src.ugrid.set_crs(da.raster.crs) - uda_dst = xu.UgridDataset.from_geodataframe(da.raster.box) - uda_dst.ugrid.set_crs(da.raster.crs) - # this should trigger the compilation of the numba_celltree methods - # used in xugrid to make sure these don't affect the test timeout resutls - xu.CentroidLocatorRegridder(uda_src, uda_dst).regrid(uda_src) - xu.OverlapRegridder(uda_src, uda_dst, method="mode").regrid(uda_src) diff --git a/tests/test_model.py b/tests/test_model.py index 2d8c289f8..d8ac131ad 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -767,9 +767,6 @@ def test_setup_mesh(tmpdir, griduda): assert np.all(np.round(model.region.total_bounds, 3) == bounds) -# allow for longer timeout to compile xugrid methods -# @pytest.mark.timeout(300) -# @pytest.mark.usefixtures("_compile_xugrid_regrid") def test_meshmodel_setup(griduda, world): MeshModel = MODELS.load("mesh_model") dc_param_fn = join(DATADIR, "parameters_data.yml") From 143d5520af7a41d5ad05a25e0b0c9c658389a86c Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Fri, 6 Oct 2023 15:57:38 +0200 Subject: [PATCH 20/20] update old comment --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b29e698aa..34cd34be5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "scipy", # scientific utilities "tomli", # parsing toml files "tomli-w", # writing toml files - "xarray", # ndim data - temporary max to wait for xugrid bugfix release + "xarray", # ndim data arrays "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT "zarr", # zarr