diff --git a/docs/api/changelog.rst b/docs/api/changelog.rst index 393a4b7a9..431d216a8 100644 --- a/docs/api/changelog.rst +++ b/docs/api/changelog.rst @@ -21,6 +21,10 @@ Added - Added :class:`imod.mf6.Viscosity` package to specify the viscosity of the groundwater flow model. +- Functionality to dump and load MODFLOW 6 simulations to/from zarr and zipstore + formats. See :meth:`imod.mf6.Modflow6Simulation.dump` and + :meth:`imod.mf6.Modflow6Simulation.from_file` for more information. + [1.0.0rc7] - 2025-10-28 ----------------------- diff --git a/docs/api/mf6.rst b/docs/api/mf6.rst index 1bdddf888..41194625f 100644 --- a/docs/api/mf6.rst +++ b/docs/api/mf6.rst @@ -81,7 +81,7 @@ Discretization StructuredDiscretization.get_regrid_methods StructuredDiscretization.regrid_like StructuredDiscretization.from_file - StructuredDiscretization.to_netcdf + StructuredDiscretization.to_file StructuredDiscretization.copy StructuredDiscretization.clip_box StructuredDiscretization.mask @@ -91,7 +91,7 @@ Discretization VerticesDiscretization.get_regrid_methods VerticesDiscretization.regrid_like VerticesDiscretization.from_file - VerticesDiscretization.to_netcdf + VerticesDiscretization.to_file VerticesDiscretization.copy VerticesDiscretization.clip_box VerticesDiscretization.mask @@ -100,7 +100,7 @@ Discretization TimeDiscretization TimeDiscretization.write TimeDiscretization.from_file - TimeDiscretization.to_netcdf + TimeDiscretization.to_file TimeDiscretization.copy TimeDiscretization.clip_box TimeDiscretization.mask @@ -110,7 +110,7 @@ Discretization AdaptiveTimeStepping AdaptiveTimeStepping.write AdaptiveTimeStepping.from_file - AdaptiveTimeStepping.to_netcdf + AdaptiveTimeStepping.to_file AdaptiveTimeStepping.copy AdaptiveTimeStepping.clip_box AdaptiveTimeStepping.mask @@ -128,7 +128,7 @@ Model settings OutputControl.is_budget_output OutputControl.write OutputControl.from_file - OutputControl.to_netcdf + OutputControl.to_file OutputControl.copy OutputControl.clip_box OutputControl.mask @@ -138,7 +138,7 @@ Model settings Solution Solution.write Solution.from_file - Solution.to_netcdf + Solution.to_file Solution.copy Solution.clip_box Solution.mask @@ -159,7 +159,7 @@ Flow Packages ApiPackage ApiPackage.write ApiPackage.from_file - ApiPackage.to_netcdf + ApiPackage.to_file ApiPackage.copy ApiPackage.is_empty ApiPackage.get_regrid_methods @@ -169,7 +169,7 @@ Flow Packages Buoyancy Buoyancy.write Buoyancy.from_file - Buoyancy.to_netcdf + Buoyancy.to_file Buoyancy.copy Buoyancy.is_empty Buoyancy.get_regrid_methods @@ -184,7 +184,7 @@ Flow Packages ConstantHead.clip_box ConstantHead.write ConstantHead.from_file - ConstantHead.to_netcdf + ConstantHead.to_file ConstantHead.copy ConstantHead.is_empty ConstantHead.get_regrid_methods @@ -196,7 +196,7 @@ Flow Packages Drainage.clip_box Drainage.write Drainage.from_file - Drainage.to_netcdf + Drainage.to_file Drainage.copy Drainage.is_empty Drainage.get_regrid_methods @@ -208,7 +208,7 @@ Flow Packages Evapotranspiration.clip_box Evapotranspiration.write Evapotranspiration.from_file - Evapotranspiration.to_netcdf + Evapotranspiration.to_file Evapotranspiration.copy Evapotranspiration.is_empty Evapotranspiration.get_regrid_methods @@ -220,7 +220,7 @@ Flow Packages GeneralHeadBoundary.clip_box GeneralHeadBoundary.write GeneralHeadBoundary.from_file - GeneralHeadBoundary.to_netcdf + GeneralHeadBoundary.to_file GeneralHeadBoundary.copy GeneralHeadBoundary.is_empty GeneralHeadBoundary.get_regrid_methods @@ -232,7 +232,7 @@ Flow Packages HorizontalFlowBarrierHydraulicCharacteristic.snap_to_grid HorizontalFlowBarrierHydraulicCharacteristic.write HorizontalFlowBarrierHydraulicCharacteristic.from_file - HorizontalFlowBarrierHydraulicCharacteristic.to_netcdf + HorizontalFlowBarrierHydraulicCharacteristic.to_file HorizontalFlowBarrierHydraulicCharacteristic.copy HorizontalFlowBarrierHydraulicCharacteristic.is_empty HorizontalFlowBarrierHydraulicCharacteristic.get_regrid_methods @@ -244,7 +244,7 @@ Flow Packages HorizontalFlowBarrierMultiplier.snap_to_grid HorizontalFlowBarrierMultiplier.write HorizontalFlowBarrierMultiplier.from_file - HorizontalFlowBarrierMultiplier.to_netcdf + HorizontalFlowBarrierMultiplier.to_file HorizontalFlowBarrierMultiplier.copy HorizontalFlowBarrierMultiplier.is_empty HorizontalFlowBarrierMultiplier.get_regrid_methods @@ -257,7 +257,7 @@ Flow Packages HorizontalFlowBarrierResistance.snap_to_grid HorizontalFlowBarrierResistance.write HorizontalFlowBarrierResistance.from_file - HorizontalFlowBarrierResistance.to_netcdf + HorizontalFlowBarrierResistance.to_file HorizontalFlowBarrierResistance.copy HorizontalFlowBarrierResistance.is_empty HorizontalFlowBarrierResistance.get_regrid_methods @@ -273,7 +273,7 @@ Flow Packages LayeredWell.clip_box LayeredWell.write LayeredWell.from_file - LayeredWell.to_netcdf + LayeredWell.to_file LayeredWell.copy LayeredWell.is_empty LayeredWell.get_regrid_methods @@ -284,7 +284,7 @@ Flow Packages InitialConditions.clip_box InitialConditions.write InitialConditions.from_file - InitialConditions.to_netcdf + InitialConditions.to_file InitialConditions.copy InitialConditions.is_empty InitialConditions.get_regrid_methods @@ -295,7 +295,7 @@ Flow Packages NodePropertyFlow.clip_box NodePropertyFlow.write NodePropertyFlow.from_file - NodePropertyFlow.to_netcdf + NodePropertyFlow.to_file NodePropertyFlow.copy NodePropertyFlow.is_empty NodePropertyFlow.get_regrid_methods @@ -307,7 +307,7 @@ Flow Packages Recharge.clip_box Recharge.write Recharge.from_file - Recharge.to_netcdf + Recharge.to_file Recharge.copy Recharge.is_empty Recharge.get_regrid_methods @@ -321,7 +321,7 @@ Flow Packages River.clip_box River.write River.from_file - River.to_netcdf + River.to_file River.copy River.is_empty River.get_regrid_methods @@ -333,7 +333,7 @@ Flow Packages SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.snap_to_grid SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.write SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.from_file - SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.to_netcdf + SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.to_file SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.copy SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.is_empty SingleLayerHorizontalFlowBarrierHydraulicCharacteristic.get_regrid_methods @@ -345,7 +345,7 @@ Flow Packages SingleLayerHorizontalFlowBarrierMultiplier.snap_to_grid SingleLayerHorizontalFlowBarrierMultiplier.write SingleLayerHorizontalFlowBarrierMultiplier.from_file - SingleLayerHorizontalFlowBarrierMultiplier.to_netcdf + SingleLayerHorizontalFlowBarrierMultiplier.to_file SingleLayerHorizontalFlowBarrierMultiplier.copy SingleLayerHorizontalFlowBarrierMultiplier.is_empty SingleLayerHorizontalFlowBarrierMultiplier.get_regrid_methods @@ -359,7 +359,7 @@ Flow Packages SingleLayerHorizontalFlowBarrierResistance.snap_to_grid SingleLayerHorizontalFlowBarrierResistance.write SingleLayerHorizontalFlowBarrierResistance.from_file - SingleLayerHorizontalFlowBarrierResistance.to_netcdf + SingleLayerHorizontalFlowBarrierResistance.to_file SingleLayerHorizontalFlowBarrierResistance.copy SingleLayerHorizontalFlowBarrierResistance.is_empty SingleLayerHorizontalFlowBarrierResistance.get_regrid_methods @@ -371,7 +371,7 @@ Flow Packages SpecificStorage.clip_box SpecificStorage.write SpecificStorage.from_file - SpecificStorage.to_netcdf + SpecificStorage.to_file SpecificStorage.copy SpecificStorage.is_empty SpecificStorage.get_regrid_methods @@ -382,7 +382,7 @@ Flow Packages StorageCoefficient.clip_box StorageCoefficient.write StorageCoefficient.from_file - StorageCoefficient.to_netcdf + StorageCoefficient.to_file StorageCoefficient.copy StorageCoefficient.is_empty StorageCoefficient.get_regrid_methods @@ -392,14 +392,14 @@ Flow Packages UnsaturatedZoneFlow.regrid_like UnsaturatedZoneFlow.write UnsaturatedZoneFlow.from_file - UnsaturatedZoneFlow.to_netcdf + UnsaturatedZoneFlow.to_file UnsaturatedZoneFlow.copy UnsaturatedZoneFlow.is_empty UnsaturatedZoneFlow.get_regrid_methods Viscosity Viscosity.write Viscosity.from_file - Viscosity.to_netcdf + Viscosity.to_file Viscosity.copy Viscosity.is_empty Viscosity.get_regrid_methods @@ -415,7 +415,7 @@ Flow Packages Well.clip_box Well.write Well.from_file - Well.to_netcdf + Well.to_file Well.copy Well.is_empty Well.get_regrid_methods @@ -430,56 +430,56 @@ Transport Packages AdvectionCentral AdvectionCentral.write AdvectionCentral.from_file - AdvectionCentral.to_netcdf + AdvectionCentral.to_file AdvectionCentral.copy AdvectionCentral.is_empty AdvectionCentral.get_regrid_methods AdvectionTVD AdvectionTVD.write AdvectionTVD.from_file - AdvectionTVD.to_netcdf + AdvectionTVD.to_file AdvectionTVD.copy AdvectionTVD.is_empty AdvectionTVD.get_regrid_methods AdvectionUpstream AdvectionUpstream.write AdvectionUpstream.from_file - AdvectionUpstream.to_netcdf + AdvectionUpstream.to_file AdvectionUpstream.copy AdvectionUpstream.is_empty AdvectionUpstream.get_regrid_methods ConstantConcentration ConstantConcentration.write ConstantConcentration.from_file - ConstantConcentration.to_netcdf + ConstantConcentration.to_file ConstantConcentration.copy ConstantConcentration.is_empty ConstantConcentration.get_regrid_methods Dispersion Dispersion.write Dispersion.from_file - Dispersion.to_netcdf + Dispersion.to_file Dispersion.copy Dispersion.is_empty Dispersion.get_regrid_methods ImmobileStorageTransfer ImmobileStorageTransfer.write ImmobileStorageTransfer.from_file - ImmobileStorageTransfer.to_netcdf + ImmobileStorageTransfer.to_file ImmobileStorageTransfer.copy ImmobileStorageTransfer.is_empty ImmobileStorageTransfer.get_regrid_methods MobileStorageTransfer MobileStorageTransfer.write MobileStorageTransfer.from_file - MobileStorageTransfer.to_netcdf + MobileStorageTransfer.to_file MobileStorageTransfer.copy MobileStorageTransfer.is_empty MobileStorageTransfer.get_regrid_methods MassSourceLoading MassSourceLoading.write MassSourceLoading.from_file - MassSourceLoading.to_netcdf + MassSourceLoading.to_file MassSourceLoading.copy MassSourceLoading.is_empty MassSourceLoading.get_regrid_methods @@ -487,7 +487,7 @@ Transport Packages SourceSinkMixing.from_flow_model SourceSinkMixing.write SourceSinkMixing.from_file - SourceSinkMixing.to_netcdf + SourceSinkMixing.to_file SourceSinkMixing.copy SourceSinkMixing.is_empty SourceSinkMixing.get_regrid_methods diff --git a/imod/common/interfaces/ipackage.py b/imod/common/interfaces/ipackage.py index 69e1851e0..f2fc343fa 100644 --- a/imod/common/interfaces/ipackage.py +++ b/imod/common/interfaces/ipackage.py @@ -34,8 +34,3 @@ def _is_regridding_supported(self) -> bool: @abstractmethod def _is_grid_agnostic_package(self) -> bool: raise NotImplementedError - - @property - @abc.abstractmethod - def pkg_id(self) -> str: - raise NotImplementedError diff --git a/imod/common/interfaces/ipackagebase.py b/imod/common/interfaces/ipackagebase.py index 658291351..0a9241469 100644 --- a/imod/common/interfaces/ipackagebase.py +++ b/imod/common/interfaces/ipackagebase.py @@ -24,3 +24,8 @@ def dataset(self, value: xr.Dataset) -> None: @abstractmethod def _from_dataset(cls, ds: GridDataset): raise NotImplementedError + + @property + @abstractmethod + def pkg_id(self) -> str: + raise NotImplementedError diff --git a/imod/common/interfaces/ipackageserializer.py b/imod/common/interfaces/ipackageserializer.py new file mode 100644 index 000000000..06dd317af --- /dev/null +++ b/imod/common/interfaces/ipackageserializer.py @@ -0,0 +1,12 @@ +import abc +from pathlib import Path + +from imod.common.interfaces.ipackagebase import IPackageBase + + +class IPackageSerializer(metaclass=abc.ABCMeta): + @abc.abstractmethod + def to_file( + self, pkg: IPackageBase, directory: Path, file_name: str, **kwargs + ) -> Path: + raise NotImplementedError diff --git a/imod/common/serializer/__init__.py b/imod/common/serializer/__init__.py new file mode 100644 index 000000000..ced9b94a5 --- /dev/null +++ b/imod/common/serializer/__init__.py @@ -0,0 +1,21 @@ +from typing import Literal, TypeAlias + +from imod.common.interfaces.ipackageserializer import IPackageSerializer +from imod.common.serializer.netcdfserializer import NetCDFSerializer +from imod.common.serializer.zarrserializer import ZarrSerializer + +EngineType: TypeAlias = Literal["netcdf4", "zarr", "zarr.zip"] + + +def create_package_serializer( + engine: EngineType, mdal_compliant: bool = False, crs: str | None = None +) -> IPackageSerializer: + match engine: + case "netcdf4": + return NetCDFSerializer(mdal_compliant=mdal_compliant, crs=crs) + case "zarr": + return ZarrSerializer(use_zip=False) + case "zarr.zip": + return ZarrSerializer(use_zip=True) + case _: + raise ValueError(f"Unrecognized engine: {engine}") diff --git a/imod/common/serializer/netcdfserializer.py b/imod/common/serializer/netcdfserializer.py new file mode 100644 index 000000000..e157818e6 --- /dev/null +++ b/imod/common/serializer/netcdfserializer.py @@ -0,0 +1,72 @@ +from copy import deepcopy +from pathlib import Path + +import numpy as np +import xugrid as xu + +from imod import util +from imod.common.interfaces.ilinedatapackage import ILineDataPackage +from imod.common.interfaces.ipackagebase import IPackageBase +from imod.common.interfaces.ipackageserializer import IPackageSerializer +from imod.typing import GridDataset +from imod.typing.grid import is_spatial_grid + + +class NetCDFSerializer(IPackageSerializer): + def __init__(self, mdal_compliant: bool = False, crs: str | None = None): + self.mdal_compliant = mdal_compliant + self.crs = crs + + def to_file( + self, pkg: IPackageBase, directory: Path, file_name: str, **kwargs + ) -> Path: + path = directory / f"{file_name}.nc" + + kwargs.update({"encoding": self._netcdf_encoding(pkg)}) + dataset = self._dataset_encoding(pkg) + + # Create encoding dict for float16 variables + for var in dataset.data_vars: + if dataset[var].dtype == np.float16: + kwargs["encoding"][var] = {"dtype": "float32"} + + # Also check coordinates + for coord in dataset.coords: + if dataset[coord].dtype == np.float16: + kwargs["encoding"][coord] = {"dtype": "float32"} + + if isinstance(dataset, xu.UgridDataset): + if self.mdal_compliant: + dataset = dataset.ugrid.to_dataset() + mdal_dataset = util.spatial.mdal_compliant_ugrid2d( + dataset, crs=self.crs + ) + mdal_dataset.to_netcdf(path, **kwargs) + else: + dataset.ugrid.to_netcdf(path, **kwargs) + else: + if is_spatial_grid(dataset): + dataset = util.spatial.gdal_compliant_grid(dataset, crs=self.crs) + dataset.to_netcdf(path, **kwargs) + + return path + + def _dataset_encoding(self, pkg: IPackageBase) -> GridDataset: + if isinstance(pkg, ILineDataPackage): + new: ILineDataPackage = deepcopy(pkg) + new.dataset["geometry"] = new.line_data.to_json() + return new.dataset + + return pkg.dataset + + def _netcdf_encoding(self, pkg: IPackageBase) -> dict: + """ + + The encoding used in the to_netcdf method + Override this to provide custom encoding rules + + """ + if isinstance(pkg, ILineDataPackage): + return {"geometry": {"dtype": "str"}} + + return {} diff --git a/imod/common/serializer/zarrserializer.py b/imod/common/serializer/zarrserializer.py new file mode 100644 index 000000000..58ca34b48 --- /dev/null +++ b/imod/common/serializer/zarrserializer.py @@ -0,0 +1,63 @@ +import shutil +from contextlib import nullcontext +from copy import deepcopy +from pathlib import Path +from typing import TYPE_CHECKING + +import xugrid as xu + +from imod.common.interfaces.ilinedatapackage import ILineDataPackage +from imod.common.interfaces.ipackagebase import IPackageBase +from imod.common.interfaces.ipackageserializer import IPackageSerializer +from imod.typing import GridDataset +from imod.util.imports import MissingOptionalModule + +if TYPE_CHECKING: + import zarr +else: + try: + import zarr + except ImportError: + zarr = MissingOptionalModule("zarr") + + +class ZarrSerializer(IPackageSerializer): + def __init__(self, use_zip: bool = False): + self.use_zip = use_zip + + def to_file( + self, pkg: IPackageBase, directory: Path, file_name: str, **kwargs + ) -> Path: + if self.use_zip: + path = directory / f"{file_name}.zarr.zip" + store = zarr.storage.ZipStore(path, mode="w") + write_context = store + else: + path = directory / f"{file_name}.zarr" + store = path + write_context = nullcontext() + + dataset = self._dataset_encoding(pkg) + + if path.exists(): + # Check if directory (ordinary .zarr, directory) or ZipStore (zip file). + if path.is_dir(): + shutil.rmtree(path) + else: + path.unlink() + + with write_context: + if isinstance(dataset, xu.UgridDataset): + dataset.ugrid.to_zarr(store=store, **kwargs) + else: + dataset.to_zarr(store=store, **kwargs) + + return path + + def _dataset_encoding(self, pkg: IPackageBase) -> GridDataset: + if isinstance(pkg, ILineDataPackage): + new: ILineDataPackage = deepcopy(pkg) + new.dataset["geometry"] = new.line_data.to_json() + return new.dataset + + return pkg.dataset diff --git a/imod/mf6/hfb.py b/imod/mf6/hfb.py index 9d843bec7..a70a1b811 100644 --- a/imod/mf6/hfb.py +++ b/imod/mf6/hfb.py @@ -5,7 +5,7 @@ from copy import deepcopy from enum import Enum from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Self, Tuple +from typing import TYPE_CHECKING, Dict, List, Optional, Self, Tuple import cftime import numpy as np @@ -531,40 +531,6 @@ def _render(self, directory, pkgname, globaltimes, binary): package, first convert to a Modflow6 package by calling pkg.to_mf6_pkg()""" ) - def to_netcdf( - self, *args, mdal_compliant: bool = False, crs: Optional[Any] = None, **kwargs - ): - """ - Write dataset contents to a netCDF file. Custom encoding rules can be - provided on package level by overriding the _netcdf_encoding in the - package - - Parameters - ---------- - *args: - Will be passed on to ``xr.Dataset.to_netcdf`` or - ``xu.UgridDataset.to_netcdf``. - mdal_compliant: bool, optional - Convert data with - :func:`imod.prepare.spatial.mdal_compliant_ugrid2d` to MDAL - compliant unstructured grids. Defaults to False. - crs: Any, optional - Anything accepted by rasterio.crs.CRS.from_user_input - Requires ``rioxarray`` installed. - **kwargs: - Will be passed on to ``xr.Dataset.to_netcdf`` or - ``xu.UgridDataset.to_netcdf``. - - """ - kwargs.update({"encoding": self._netcdf_encoding()}) - - new = deepcopy(self) - new.dataset["geometry"] = new.line_data.to_json() - new.dataset.to_netcdf(*args, **kwargs) - - def _netcdf_encoding(self): - return {"geometry": {"dtype": "str"}} - @classmethod def from_file(cls, path: str | Path, **kwargs) -> Self: """ diff --git a/imod/mf6/model.py b/imod/mf6/model.py index 8f3888abb..ee0de6da6 100644 --- a/imod/mf6/model.py +++ b/imod/mf6/model.py @@ -19,6 +19,7 @@ import imod from imod.common.interfaces.imodel import IModel +from imod.common.serializer import EngineType from imod.common.statusinfo import NestedStatusInfo, StatusInfo, StatusInfoBase from imod.common.utilities.clip import clip_box_dataset from imod.common.utilities.mask import _mask_all_packages @@ -592,7 +593,8 @@ def dump( validate: bool = True, mdal_compliant: bool = False, crs: Optional[Any] = None, - ): + engine: EngineType = "netcdf4", + ) -> Path: """ Dump simulation to files. Writes a model definition as .TOML file, which points to data for each package. Each package is stored as a separate @@ -615,6 +617,15 @@ def dump( crs: Any, optional Anything accepted by rasterio.crs.CRS.from_user_input Requires ``rioxarray`` installed. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + """ modeldirectory = pathlib.Path(directory) / modelname modeldirectory.mkdir(exist_ok=True, parents=True) @@ -625,12 +636,16 @@ def dump( raise ValidationError(statusinfo.to_string()) toml_content: dict = collections.defaultdict(dict) + for pkgname, pkg in self.items(): - pkg_path = f"{pkgname}.nc" - toml_content[type(pkg).__name__][pkgname] = pkg_path - pkg.to_netcdf( - modeldirectory / pkg_path, crs=crs, mdal_compliant=mdal_compliant + pkg_path = pkg.to_file( + modeldirectory, + pkgname, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, ) + toml_content[type(pkg).__name__][pkgname] = pkg_path.name toml_path = modeldirectory / f"{modelname}.toml" with open(toml_path, "wb") as f: diff --git a/imod/mf6/package.py b/imod/mf6/package.py index 228b7551e..79ebedb9d 100644 --- a/imod/mf6/package.py +++ b/imod/mf6/package.py @@ -76,7 +76,6 @@ class Package(PackageBase, IPackage, abc.ABC): not the list input which is used in :class:`BoundaryCondition`. """ - _pkg_id = "" _init_schemata: SchemataDict = {} _write_schemata: SchemataDict = {} _keyword_map: dict[str, str] = {} @@ -567,10 +566,6 @@ def _is_grid_agnostic_package(cls) -> bool: """ return False - @property - def pkg_id(self) -> str: - return self._pkg_id - def __repr__(self) -> str: typename = type(self).__name__ return f"{typename}\n{self.dataset.__repr__()}" diff --git a/imod/mf6/pkgbase.py b/imod/mf6/pkgbase.py index fe78a789a..6a5f212ca 100644 --- a/imod/mf6/pkgbase.py +++ b/imod/mf6/pkgbase.py @@ -1,7 +1,7 @@ import abc import numbers from pathlib import Path -from typing import Any, Mapping, Optional, Self +from typing import TYPE_CHECKING, Any, Mapping, Optional, Self, final import numpy as np import xarray as xr @@ -10,12 +10,22 @@ import imod from imod.common.interfaces.ipackagebase import IPackageBase +from imod.common.serializer import EngineType, create_package_serializer from imod.typing.grid import ( GridDataArray, GridDataset, - is_spatial_grid, merge_with_dictionary, ) +from imod.util.imports import MissingOptionalModule + +if TYPE_CHECKING: + import zarr +else: + try: + import zarr + except ImportError: + zarr = MissingOptionalModule("zarr") + TRANSPORT_PACKAGES = ("adv", "dsp", "ssm", "mst", "ist", "src") EXCHANGE_PACKAGES = ("gwfgwf", "gwfgwt", "gwtgwt") @@ -40,6 +50,8 @@ class PackageBase(IPackageBase, abc.ABC): object.dataset.to_netcdf(...) """ + _pkg_id = "" + # This method has been added to allow mock.patch to mock created objects # https://stackoverflow.com/questions/64737213/how-to-patch-the-new-method-of-a-class def __new__(cls, *_, **__): @@ -60,74 +72,16 @@ def dataset(self) -> GridDataset: def dataset(self, value: GridDataset) -> None: self.__dataset = value + @property + def pkg_id(self) -> str: + return self._pkg_id + def __getitem__(self, key): return self.dataset.__getitem__(key) def __setitem__(self, key, value): self.dataset.__setitem__(key, value) - def to_netcdf( - self, *args, mdal_compliant: bool = False, crs: Optional[Any] = None, **kwargs - ) -> None: - """ - Write dataset contents to a netCDF file. Custom encoding rules can be - provided on package level by overriding the _netcdf_encoding in the - package - - Parameters - ---------- - *args: - Will be passed on to ``xr.Dataset.to_netcdf`` or - ``xu.UgridDataset.to_netcdf``. - mdal_compliant: bool, optional - Convert data with - :func:`imod.prepare.spatial.mdal_compliant_ugrid2d` to MDAL - compliant unstructured grids. Defaults to False. - crs: Any, optional - Anything accepted by rasterio.crs.CRS.from_user_input - Requires ``rioxarray`` installed. - **kwargs: - Will be passed on to ``xr.Dataset.to_netcdf`` or - ``xu.UgridDataset.to_netcdf``. - - """ - kwargs.update({"encoding": self._netcdf_encoding()}) - - dataset = self.dataset - - # Create encoding dict for float16 variables - for var in dataset.data_vars: - if dataset[var].dtype == np.float16: - kwargs["encoding"][var] = {"dtype": "float32"} - - # Also check coordinates - for coord in dataset.coords: - if dataset[coord].dtype == np.float16: - kwargs["encoding"][coord] = {"dtype": "float32"} - - if isinstance(dataset, xu.UgridDataset): - if mdal_compliant: - dataset = dataset.ugrid.to_dataset() - mdal_dataset = imod.util.spatial.mdal_compliant_ugrid2d( - dataset, crs=crs - ) - mdal_dataset.to_netcdf(*args, **kwargs) - else: - dataset.ugrid.to_netcdf(*args, **kwargs) - else: - if is_spatial_grid(dataset): - dataset = imod.util.spatial.gdal_compliant_grid(dataset, crs=crs) - dataset.to_netcdf(*args, **kwargs) - - def _netcdf_encoding(self) -> dict: - """ - - The encoding used in the to_netcdf method - Override this to provide custom encoding rules - - """ - return {} - @classmethod def _from_dataset(cls, ds: GridDataset) -> Self: """ @@ -176,8 +130,11 @@ def from_file(cls, path: str | Path, **kwargs) -> Self: """ path = Path(path) if path.suffix in (".zip", ".zarr"): - # TODO: seems like a bug? Remove str() call if fixed in xarray/zarr - dataset = xr.open_zarr(str(path), **kwargs) + if path.suffix == ".zip": + with zarr.storage.ZipStore(path, mode="r") as store: + dataset = xr.open_zarr(store, **kwargs) + else: + dataset = xr.open_zarr(str(path), **kwargs) else: dataset = xr.open_dataset(path, chunks="auto", **kwargs) @@ -203,3 +160,54 @@ def from_file(cls, path: str | Path, **kwargs) -> Self: dataset[var] = dataset[var].astype(str) return cls._from_dataset(dataset) + + @final + def to_file( + self, + modeldirectory: Path, + pkgname: str, + mdal_compliant: bool = False, + crs: Optional[Any] = None, + engine: EngineType = "netcdf4", + **kwargs, + ) -> Path: + """ + Write package to file. + + Parameters + ---------- + modeldirectory : pathlib.Path + Directory where to write the package file. + pkgname : str + Name of the package, used to create the file name. + mdal_compliant : bool, optional + Whether to write the package in MDAL-compliant format. Only used if + ``engine="netcdf4"``. Default is False. + crs : optional + Coordinate reference system to use when writing the package. Only + used if ``engine="netcdf4"`` Default is None. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. + **kwargs : keyword arguments + Additional keyword arguments forwarded to the package serializer's + `to_file` method. + + Returns + ------- + pathlib.Path + Path to the written package file. + """ + + # All serialization logic is in the package serializers do not override + # this method (hence final decorator). + filewriter = create_package_serializer( + engine, mdal_compliant=mdal_compliant, crs=crs + ) + path = filewriter.to_file(self, modeldirectory, pkgname, **kwargs) + return path diff --git a/imod/mf6/simulation.py b/imod/mf6/simulation.py index d873db1b0..d3c734f5c 100644 --- a/imod/mf6/simulation.py +++ b/imod/mf6/simulation.py @@ -18,10 +18,10 @@ import xugrid as xu import imod -import imod.logging import imod.mf6.exchangebase from imod.common.interfaces.imodel import IModel from imod.common.interfaces.isimulation import ISimulation +from imod.common.serializer import EngineType from imod.common.statusinfo import NestedStatusInfo from imod.common.utilities.dataclass_type import DataclassType from imod.common.utilities.mask import _mask_all_models @@ -971,6 +971,7 @@ def dump( validate: bool = True, mdal_compliant: bool = False, crs=None, + engine: EngineType = "netcdf4", ) -> None: """ Dump simulation to files. Writes a model definition as .TOML file, which @@ -992,6 +993,14 @@ def dump( crs: Any, optional Anything accepted by rasterio.crs.CRS.from_user_input Requires ``rioxarray`` installed. + engine : str, optional + File engine used to write packages. Options are ``'netcdf4'``, + ``'zarr'``, and ``'zarr.zip'``. NetCDF4 is readable by many other + softwares, for example QGIS. Zarr is optimized for big data, cloud + storage and parallel access. The ``'zarr.zip'`` option is an + experimental option which creates a zipped zarr store in a single + file, which is easier to copy and automatically compresses data as + well. Default is ``'netcdf4'``. Examples -------- @@ -1013,6 +1022,10 @@ def dump( loaded into QGIS, you can set ``mdal_compliant=True``: >>> mf6_sim.dump("path/to/directory", mdal_compliant=True, crs="EPSG:4326") + + For big data, storing to ``"zarr"`` or ``"zarr.zip"`` is more efficient: + + >>> mf6_sim.dump("path/to/directory", engine="zarr") """ directory = pathlib.Path(directory) directory.mkdir(parents=True, exist_ok=True) @@ -1021,12 +1034,13 @@ def dump( # Dump version number version = get_version() toml_content["version"] = {"imod-python": version} + # Dump models and exchanges for key, value in self.items(): cls_name = type(value).__name__ if isinstance(value, Modflow6Model): model_toml_path = value.dump( - directory, key, validate, mdal_compliant, crs + directory, key, validate, mdal_compliant, crs, engine=engine ) toml_content[cls_name][key] = model_toml_path.relative_to( directory @@ -1036,14 +1050,25 @@ def dump( for exchange_package in self[key]: _, filename, _, _ = exchange_package.get_specification() exchange_class_short = type(exchange_package).__name__ - path = f"{filename}.nc" - exchange_package.dataset.to_netcdf(directory / path) - toml_content[key][exchange_class_short].append(path) + path = exchange_package.to_file( + directory, + filename, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, + ) + + toml_content[key][exchange_class_short].append(path.name) else: - path = f"{key}.nc" - value.dataset.to_netcdf(directory / path) - toml_content[cls_name][key] = path + path = value.to_file( + directory, + key, + mdal_compliant=mdal_compliant, + crs=crs, + engine=engine, + ) + toml_content[cls_name][key] = path.name with open(directory / f"{self.name}.toml", "wb") as f: tomli_w.dump(toml_content, f) diff --git a/imod/tests/test_mf6/test_mf6_simulation.py b/imod/tests/test_mf6/test_mf6_simulation.py index 097887a0d..81b9e827c 100644 --- a/imod/tests/test_mf6/test_mf6_simulation.py +++ b/imod/tests/test_mf6/test_mf6_simulation.py @@ -44,32 +44,37 @@ from imod.typing.grid import zeros_like -def roundtrip(simulation, tmpdir_factory, name): +def roundtrip(simulation, tmpdir_factory, name, engine): # TODO: look at the values? tmp_path = tmpdir_factory.mktemp(name) - simulation.dump(tmp_path) + simulation.dump(tmp_path, engine=engine) back = imod.mf6.Modflow6Simulation.from_file(tmp_path / f"{simulation.name}.toml") assert isinstance(back, imod.mf6.Modflow6Simulation) -def test_twri_roundtrip(twri_model, tmpdir_factory): - roundtrip(twri_model, tmpdir_factory, "twri") +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_twri_roundtrip(twri_model, tmpdir_factory, engine): + roundtrip(twri_model, tmpdir_factory, "twri", engine) -def test_twri_hfb_roundtrip(twri_model_hfb, tmpdir_factory): - roundtrip(twri_model_hfb, tmpdir_factory, "twri") +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_twri_hfb_roundtrip(twri_model_hfb, tmpdir_factory, engine): + roundtrip(twri_model_hfb, tmpdir_factory, "twri", engine) -def test_twri_transient_roundtrip(transient_twri_model, tmpdir_factory): - roundtrip(transient_twri_model, tmpdir_factory, "twri_transient") +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_twri_transient_roundtrip(transient_twri_model, tmpdir_factory, engine): + roundtrip(transient_twri_model, tmpdir_factory, "twri_transient", engine) -def test_twri_disv_roundtrip(twri_disv_model, tmpdir_factory): - roundtrip(twri_disv_model, tmpdir_factory, "twri_disv") +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_twri_disv_roundtrip(twri_disv_model, tmpdir_factory, engine): + roundtrip(twri_disv_model, tmpdir_factory, "twri_disv", engine) -def test_circle_roundtrip(circle_model, tmpdir_factory): - roundtrip(circle_model, tmpdir_factory, "circle") +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_circle_roundtrip(circle_model, tmpdir_factory, engine): + roundtrip(circle_model, tmpdir_factory, "circle", engine) def test_dump_version_number__version_written(twri_model, tmpdir_factory): diff --git a/imod/tests/test_mf6/test_multimodel/test_mf6_modelsplitter_transport.py b/imod/tests/test_mf6/test_multimodel/test_mf6_modelsplitter_transport.py index b2a35bdf7..0fecb995e 100644 --- a/imod/tests/test_mf6/test_multimodel/test_mf6_modelsplitter_transport.py +++ b/imod/tests/test_mf6/test_multimodel/test_mf6_modelsplitter_transport.py @@ -32,7 +32,9 @@ def test_slice_model_structured(flow_transport_simulation: Modflow6Simulation): assert package_name in list(new_model.keys()) +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) def test_split_dump( + engine: str, tmp_path: Path, flow_transport_simulation: Modflow6Simulation, ): @@ -48,7 +50,7 @@ def test_split_dump( split_simulation.write( tmp_path / "split/original" ) # a write is necessary before the dump to generate the gwtgwf packages - split_simulation.dump(tmp_path / "split") + split_simulation.dump(tmp_path / "split", engine=engine) reloaded_split = Modflow6Simulation.from_file( tmp_path / "split/1d_tpt_benchmark_partioned.toml" ) diff --git a/imod/tests/test_mf6/test_package_sanity.py b/imod/tests/test_mf6/test_package_sanity.py index 6e3c2efe3..cdadf2005 100644 --- a/imod/tests/test_mf6/test_package_sanity.py +++ b/imod/tests/test_mf6/test_package_sanity.py @@ -97,10 +97,10 @@ def test_render_twice(instance, tmp_path): @pytest.mark.parametrize("instance", ALL_PACKAGE_INSTANCES) -def test_save_and_load(instance, tmp_path): +@pytest.mark.parametrize("engine", ["netcdf4", "zarr", "zarr.zip"]) +def test_save_and_load(instance, engine, tmp_path): pkg_class = type(instance) - path = tmp_path / f"{instance._pkg_id}.nc" - instance.to_netcdf(path) + path = instance.to_file(tmp_path, instance._pkg_id, engine=engine) back = pkg_class.from_file(path) assert instance.dataset.equals(back.dataset)