Modernize derived variables #2999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

bouweandela wants to merge 9 commits into main from modernize-derived-variables

+170 −318

environment.yml

-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,7 @@ dependencies: @@
       - humanfriendly
       - intake-esgf >=2025.10.22
       - intake-esm
-      - iris
+      - iris >=3.13
       - iris-esmf-regrid >=0.11.0
       - iris-grib >=0.20.0  # github.com/ESMValGroup/ESMValCore/issues/2535
       - isodate >=0.7.0  # incompatible with very old 0.6.1
@@ Expand Down @@

esmvalcore/cmor/_fixes/cmip6/cesm2.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -2,13 +2,13 @@
  
    from __future__ import annotations

    from shutil import copyfile

    from typing import TYPE_CHECKING

    import iris

    import iris.coords

    import ncdata

    import ncdata.netcdf4

    import numpy as np

    from netCDF4 import Dataset

    from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord

    from esmvalcore.cmor._fixes.fix import Fix

    @@ -19,40 +19,36 @@
  
        add_scalar_typesea_coord,

        fix_ocean_depth_coord,

    )

    from esmvalcore.iris_helpers import dataset_to_iris

    if TYPE_CHECKING:

        from collections.abc import Sequence

        from pathlib import Path

        from iris.cube import Cube

    class Cl(Fix):

        """Fixes for ``cl``."""

        def _fix_formula_terms(

            self,

            file: str | Path,

            output_dir: str | Path,

            add_unique_suffix: bool = False,

        ) -> Path:

        @staticmethod

        def _fix_formula_terms(dataset: ncdata.NcData) -> None:

            """Fix ``formula_terms`` attribute."""

            new_path = self.get_fixed_filepath(

                output_dir,

                file,

                add_unique_suffix=add_unique_suffix,

            lev = dataset.variables["lev"]

            lev.set_attrval("formula_terms", "p0: p0 a: a b: b ps: ps")

            lev.set_attrval(

                "standard_name",

                "atmosphere_hybrid_sigma_pressure_coordinate",

            )

            copyfile(file, new_path)

            with Dataset(new_path, mode="a") as dataset:

                dataset.variables["lev"].formula_terms = "p0: p0 a: a b: b ps: ps"

                dataset.variables[

                    "lev"

                ].standard_name = "atmosphere_hybrid_sigma_pressure_coordinate"

            return new_path

            lev.set_attrval("units", "1")

            dataset.variables["lev_bnds"].attributes.pop("units")

        def fix_file(

            self,

            file: str | Path,

            output_dir: str | Path,

            add_unique_suffix: bool = False,

        ) -> Path:

            file: Path,

            output_dir: Path,  # noqa: ARG002

            add_unique_suffix: bool = False,  # noqa: ARG002

        ) -> Path | Sequence[Cube]:

            """Fix hybrid pressure coordinate.

            Adds missing ``formula_terms`` attribute to file.

    @@ -79,45 +75,38 @@ def fix_file(
  
                Path to the fixed file.

            """

            new_path = self._fix_formula_terms(

            dataset = ncdata.netcdf4.from_nc4(

                file,

                output_dir,

                add_unique_suffix=add_unique_suffix,

                # Use iris-style chunks to avoid mismatching chunks between data

                # and derived coordinates, as the latter are automatically rechunked

                # by iris.

                dim_chunks={

                    "time": "auto",

                    "lev": None,

                    "lat": None,

                    "lon": None,

                    "nbnd": None,

                },

            )

            with Dataset(new_path, mode="a") as dataset:

                dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][

                    ::-1,

                    :,

                ]

                dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][

                    ::-1,

                    :,

                ]

            return new_path

        def fix_metadata(self, cubes):

            """Fix ``atmosphere_hybrid_sigma_pressure_coordinate``.

            See discussion in #882 for more details on that.

            Parameters

            ----------

            cubes : iris.cube.CubeList

                Input cubes.

            Returns

            -------

            iris.cube.CubeList

            """

            cube = self.get_cube_from_list(cubes)

            lev_coord = cube.coord(var_name="lev")

            a_coord = cube.coord(var_name="a")

            b_coord = cube.coord(var_name="b")

            lev_coord.points = a_coord.core_points() + b_coord.core_points()

            lev_coord.bounds = a_coord.core_bounds() + b_coord.core_bounds()

            lev_coord.units = "1"

            return cubes

            self._fix_formula_terms(dataset)

            # Correct order of bounds data

            a_bnds = dataset.variables["a_bnds"]

            a_bnds.data = a_bnds.data[::-1, :]

            b_bnds = dataset.variables["b_bnds"]

            b_bnds.data = b_bnds.data[::-1, :]

            # Correct lev and lev_bnds data

            lev = dataset.variables["lev"]

            lev.data = dataset.variables["a"].data + dataset.variables["b"].data

            lev_bnds = dataset.variables["lev_bnds"]

            lev_bnds.data = (

                dataset.variables["a_bnds"].data + dataset.variables["b_bnds"].data

            )

            # Remove 'title' attribute that duplicates long name

            for var_name in dataset.variables:

                dataset.variables[var_name].attributes.pop("title", None)

            return [self.get_cube_from_list(dataset_to_iris(dataset, file))]

    Cli = Cl

esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,8 +1,13 @@
  
    """Fixes for CESM2-WACCM model."""

    from netCDF4 import Dataset

    from __future__ import annotations

    from typing import TYPE_CHECKING

    import ncdata.netcdf4

    from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord

    from esmvalcore.iris_helpers import dataset_to_iris

    from .cesm2 import Cl as BaseCl

    from .cesm2 import Fgco2 as BaseFgco2

    @@ -12,11 +17,22 @@
  
    from .cesm2 import Tasmax as BaseTasmax

    from .cesm2 import Tasmin as BaseTasmin

    if TYPE_CHECKING:

        from collections.abc import Sequence

        from pathlib import Path

        from iris.cube import Cube

    class Cl(BaseCl):

        """Fixes for cl."""

        def fix_file(self, file, output_dir, add_unique_suffix=False):

        def fix_file(

            self,

            file: Path,

            output_dir: Path,  # noqa: ARG002

            add_unique_suffix: bool = False,  # noqa: ARG002

        ) -> Path | Sequence[Cube]:

            """Fix hybrid pressure coordinate.

            Adds missing ``formula_terms`` attribute to file.

    @@ -43,21 +59,27 @@ def fix_file(self, file, output_dir, add_unique_suffix=False):
  
                Path to the fixed file.

            """

            new_path = self._fix_formula_terms(

            dataset = ncdata.netcdf4.from_nc4(

                file,

                output_dir,

                add_unique_suffix=add_unique_suffix,

                # Use iris-style chunks to avoid mismatching chunks between data

                # and derived coordinates, as the latter are automatically rechunked

                # by iris.

                dim_chunks={

                    "time": "auto",

                    "lev": None,

                    "lat": None,

                    "lon": None,

                    "nbnd": None,

                },

            )

            with Dataset(new_path, mode="a") as dataset:

                dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][

                    :,

                    ::-1,

                ]

                dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][

                    :,

                    ::-1,

                ]

            return new_path

            self._fix_formula_terms(dataset)

            # Correct order of bounds data

            a_bnds = dataset.variables["a_bnds"]

            a_bnds.data = a_bnds.data[:, ::-1]

            b_bnds = dataset.variables["b_bnds"]

            b_bnds.data = b_bnds.data[:, ::-1]

            return [self.get_cube_from_list(dataset_to_iris(dataset, file))]

    Cli = Cl

esmvalcore/cmor/_fixes/fix.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -35,8 +35,6 @@
  
    if TYPE_CHECKING:

        from collections.abc import Sequence

        import ncdata

        import xarray as xr

        from iris.coords import Coord

        from iris.cube import Cube

    @@ -84,10 +82,10 @@ def __init__(
  
        def fix_file(

            self,

            file: str | Path | xr.Dataset | ncdata.NcData,

            file: Path,

            output_dir: Path,  # noqa: ARG002

            add_unique_suffix: bool = False,  # noqa: ARG002

        ) -> str | Path | xr.Dataset | ncdata.NcData:

        ) -> Path | Sequence[Cube]:

            """Fix files before loading them into a :class:`~iris.cube.CubeList`.

            This is mainly intended to fix errors that prevent loading the data

    @@ -116,7 +114,7 @@ def fix_file(
  
            Returns

            -------

            str | pathlib.Path | xr.Dataset | ncdata.NcData:

            :

                Fixed data or a path to them.

            """

esmvalcore/cmor/fix.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -9,20 +9,16 @@
  
    import logging

    from collections import defaultdict

    from collections.abc import Sequence

    from pathlib import Path

    from typing import TYPE_CHECKING, Any

    from iris.cube import CubeList

    from iris.cube import Cube, CubeList

    from esmvalcore.cmor._fixes.fix import Fix

    from esmvalcore.io.local import LocalFile

    if TYPE_CHECKING:

        from collections.abc import Sequence

        from pathlib import Path

        import ncdata

        import xarray as xr

        from iris.cube import Cube

        from esmvalcore.config import Session

    logger = logging.getLogger(__name__)

    @@ -39,7 +35,7 @@ def fix_file(  # noqa: PLR0913
  
        session: Session | None = None,

        frequency: str | None = None,

        **extra_facets: Any,

    ) -> str | Path | xr.Dataset | ncdata.NcData:

    ) -> Path | Sequence[Cube]:

        """Fix files before loading them into a :class:`~iris.cube.CubeList`.

        This is mainly intended to fix errors that prevent loading the data with

    @@ -51,7 +47,7 @@ def fix_file(  # noqa: PLR0913
  
        -------

        A path should only be returned if it points to the original (unchanged)

        file (i.e., a fix was not necessary). If a fix is necessary, this function

        should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object.

        should return a :class:`~iris.cube.CubeList`.

        Under no circumstances a copy of the input data should be created (this is

        very inefficient).

    @@ -80,10 +76,15 @@ def fix_file(  # noqa: PLR0913
  
        Returns

        -------

        str | pathlib.Path | xr.Dataset | ncdata.NcData:

        :

            Fixed data or a path to them.

        """

        if not isinstance(file, Path):

            # Skip this function for `esmvalcore.io.DataElement` that is not a path

            # to a file.

            return file

        # Update extra_facets with variable information given as regular arguments

        # to this function

        extra_facets.update(

    @@ -96,6 +97,7 @@ def fix_file(  # noqa: PLR0913
  
            },

        )

        result: Path | Sequence[Cube] = Path(file)

        for fix in Fix.get_fixes(

            project=project,

            dataset=dataset,

    @@ -105,12 +107,34 @@ def fix_file(  # noqa: PLR0913
  
            session=session,

            frequency=frequency,

        ):

            file = fix.fix_file(

                file,

            result = fix.fix_file(

                result,

                output_dir,

                add_unique_suffix=add_unique_suffix,

            )

        return file

        if isinstance(file, LocalFile):

            # This happens when this function is called from

            # `esmvalcore.dataset.Dataset.load`.

            if isinstance(result, Path):

                if result == file:

                    # No fixes have been applied, return the original file.

                    result = file

                else:

                    # The file has been fixed and the result is a path to the fixed

                    # file. The result needs to be loaded to read the global

                    # attributes for recording provenance.

                    fixed_file = LocalFile(result)

                    fixed_file.facets = file.facets

                    fixed_file.ignore_warnings = file.ignore_warnings

                    result = fixed_file.to_iris()

            if isinstance(result, Sequence) and isinstance(result[0], Cube):

                # Set the attributes for recording provenance here because

                # to_iris will not be called on the original file.

                file.attributes = result[0].attributes.globals.copy()

        return result

    def fix_metadata(

esmvalcore/config/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -29,6 +29,7 @@ @@
     for attr, value in {
         "save_split_attrs": True,
         "date_microseconds": True,
+        "derived_bounds": True,
     }.items():
         with contextlib.suppress(AttributeError):
             setattr(iris.FUTURE, attr, value)

esmvalcore/preprocessor/_concatenate.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -13,7 +13,6 @@ @@
     from esmvalcore.cmor.check import CheckLevels
     from esmvalcore.io.esgf.facets import FACETS
     from esmvalcore.iris_helpers import merge_cube_attributes
-    from esmvalcore.preprocessor._shared import _rechunk_aux_factory_dependencies
     if TYPE_CHECKING:
         from collections.abc import Iterable, Sequence
@@ Expand Down Expand Up / @@ -282,7 +281,6 @@ def concatenate( @@
         cubes = _sort_cubes_by_time(cubes)
         _fix_calendars(cubes)
         cubes = _remove_time_overlaps(cubes)
-        cubes = [_rechunk_aux_factory_dependencies(cube) for cube in cubes]
         result = _concatenate_cubes(cubes, check_level=check_level)
         if len(result) == 1:
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Modernize derived variables #2999

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Modernize derived variables #2999

Are you sure you want to change the base?

Modernize derived variables #2999

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!