Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- humanfriendly
- intake-esgf >=2025.10.22
- intake-esm
- iris
- iris >=3.13
- iris-esmf-regrid >=0.11.0
- iris-grib >=0.20.0 # github.com/ESMValGroup/ESMValCore/issues/2535
- isodate >=0.7.0 # incompatible with very old 0.6.1
Expand Down
109 changes: 49 additions & 60 deletions esmvalcore/cmor/_fixes/cmip6/cesm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from __future__ import annotations

from shutil import copyfile
from typing import TYPE_CHECKING

import iris
import iris.coords
import ncdata
import ncdata.netcdf4
import numpy as np
from netCDF4 import Dataset

from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord
from esmvalcore.cmor._fixes.fix import Fix
Expand All @@ -19,40 +19,36 @@
add_scalar_typesea_coord,
fix_ocean_depth_coord,
)
from esmvalcore.iris_helpers import dataset_to_iris

if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path

from iris.cube import Cube


class Cl(Fix):
"""Fixes for ``cl``."""

def _fix_formula_terms(
self,
file: str | Path,
output_dir: str | Path,
add_unique_suffix: bool = False,
) -> Path:
@staticmethod
def _fix_formula_terms(dataset: ncdata.NcData) -> None:
"""Fix ``formula_terms`` attribute."""
new_path = self.get_fixed_filepath(
output_dir,
file,
add_unique_suffix=add_unique_suffix,
lev = dataset.variables["lev"]
lev.set_attrval("formula_terms", "p0: p0 a: a b: b ps: ps")
lev.set_attrval(
"standard_name",
"atmosphere_hybrid_sigma_pressure_coordinate",
)
copyfile(file, new_path)
with Dataset(new_path, mode="a") as dataset:
dataset.variables["lev"].formula_terms = "p0: p0 a: a b: b ps: ps"
dataset.variables[
"lev"
].standard_name = "atmosphere_hybrid_sigma_pressure_coordinate"
return new_path
lev.set_attrval("units", "1")
dataset.variables["lev_bnds"].attributes.pop("units")

def fix_file(
self,
file: str | Path,
output_dir: str | Path,
add_unique_suffix: bool = False,
) -> Path:
file: Path,
output_dir: Path, # noqa: ARG002
add_unique_suffix: bool = False, # noqa: ARG002
) -> Path | Sequence[Cube]:
"""Fix hybrid pressure coordinate.

Adds missing ``formula_terms`` attribute to file.
Expand All @@ -79,45 +75,38 @@ def fix_file(
Path to the fixed file.

"""
new_path = self._fix_formula_terms(
dataset = ncdata.netcdf4.from_nc4(
file,
output_dir,
add_unique_suffix=add_unique_suffix,
# Use iris-style chunks to avoid mismatching chunks between data
# and derived coordinates, as the latter are automatically rechunked
# by iris.
dim_chunks={
"time": "auto",
"lev": None,
"lat": None,
"lon": None,
"nbnd": None,
},
)
with Dataset(new_path, mode="a") as dataset:
dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][
::-1,
:,
]
dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][
::-1,
:,
]
return new_path

def fix_metadata(self, cubes):
"""Fix ``atmosphere_hybrid_sigma_pressure_coordinate``.

See discussion in #882 for more details on that.

Parameters
----------
cubes : iris.cube.CubeList
Input cubes.

Returns
-------
iris.cube.CubeList

"""
cube = self.get_cube_from_list(cubes)
lev_coord = cube.coord(var_name="lev")
a_coord = cube.coord(var_name="a")
b_coord = cube.coord(var_name="b")
lev_coord.points = a_coord.core_points() + b_coord.core_points()
lev_coord.bounds = a_coord.core_bounds() + b_coord.core_bounds()
lev_coord.units = "1"
return cubes
self._fix_formula_terms(dataset)

# Correct order of bounds data
a_bnds = dataset.variables["a_bnds"]
a_bnds.data = a_bnds.data[::-1, :]
b_bnds = dataset.variables["b_bnds"]
b_bnds.data = b_bnds.data[::-1, :]

# Correct lev and lev_bnds data
lev = dataset.variables["lev"]
lev.data = dataset.variables["a"].data + dataset.variables["b"].data
lev_bnds = dataset.variables["lev_bnds"]
lev_bnds.data = (
dataset.variables["a_bnds"].data + dataset.variables["b_bnds"].data
)
# Remove 'title' attribute that duplicates long name
for var_name in dataset.variables:
dataset.variables[var_name].attributes.pop("title", None)
return [self.get_cube_from_list(dataset_to_iris(dataset, file))]


Cli = Cl
Expand Down
52 changes: 37 additions & 15 deletions esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
"""Fixes for CESM2-WACCM model."""

from netCDF4 import Dataset
from __future__ import annotations

from typing import TYPE_CHECKING

import ncdata.netcdf4

from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord
from esmvalcore.iris_helpers import dataset_to_iris

from .cesm2 import Cl as BaseCl
from .cesm2 import Fgco2 as BaseFgco2
Expand All @@ -12,11 +17,22 @@
from .cesm2 import Tasmax as BaseTasmax
from .cesm2 import Tasmin as BaseTasmin

if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path

from iris.cube import Cube


class Cl(BaseCl):
"""Fixes for cl."""

def fix_file(self, file, output_dir, add_unique_suffix=False):
def fix_file(
self,
file: Path,
output_dir: Path, # noqa: ARG002
add_unique_suffix: bool = False, # noqa: ARG002
) -> Path | Sequence[Cube]:
"""Fix hybrid pressure coordinate.

Adds missing ``formula_terms`` attribute to file.
Expand All @@ -43,21 +59,27 @@ def fix_file(self, file, output_dir, add_unique_suffix=False):
Path to the fixed file.

"""
new_path = self._fix_formula_terms(
dataset = ncdata.netcdf4.from_nc4(
file,
output_dir,
add_unique_suffix=add_unique_suffix,
# Use iris-style chunks to avoid mismatching chunks between data
# and derived coordinates, as the latter are automatically rechunked
# by iris.
dim_chunks={
"time": "auto",
"lev": None,
"lat": None,
"lon": None,
"nbnd": None,
},
)
with Dataset(new_path, mode="a") as dataset:
dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][
:,
::-1,
]
dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][
:,
::-1,
]
return new_path
self._fix_formula_terms(dataset)

# Correct order of bounds data
a_bnds = dataset.variables["a_bnds"]
a_bnds.data = a_bnds.data[:, ::-1]
b_bnds = dataset.variables["b_bnds"]
b_bnds.data = b_bnds.data[:, ::-1]
return [self.get_cube_from_list(dataset_to_iris(dataset, file))]


Cli = Cl
Expand Down
8 changes: 3 additions & 5 deletions esmvalcore/cmor/_fixes/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
if TYPE_CHECKING:
from collections.abc import Sequence

import ncdata
import xarray as xr
from iris.coords import Coord
from iris.cube import Cube

Expand Down Expand Up @@ -84,10 +82,10 @@ def __init__(

def fix_file(
self,
file: str | Path | xr.Dataset | ncdata.NcData,
file: Path,
output_dir: Path, # noqa: ARG002
add_unique_suffix: bool = False, # noqa: ARG002
) -> str | Path | xr.Dataset | ncdata.NcData:
) -> Path | Sequence[Cube]:
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.

This is mainly intended to fix errors that prevent loading the data
Expand Down Expand Up @@ -116,7 +114,7 @@ def fix_file(

Returns
-------
str | pathlib.Path | xr.Dataset | ncdata.NcData:
:
Fixed data or a path to them.

"""
Expand Down
52 changes: 38 additions & 14 deletions esmvalcore/cmor/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@

import logging
from collections import defaultdict
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any

from iris.cube import CubeList
from iris.cube import Cube, CubeList

from esmvalcore.cmor._fixes.fix import Fix
from esmvalcore.io.local import LocalFile

if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path

import ncdata
import xarray as xr
from iris.cube import Cube

from esmvalcore.config import Session

logger = logging.getLogger(__name__)
Expand All @@ -39,7 +35,7 @@ def fix_file( # noqa: PLR0913
session: Session | None = None,
frequency: str | None = None,
**extra_facets: Any,
) -> str | Path | xr.Dataset | ncdata.NcData:
) -> Path | Sequence[Cube]:
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.

This is mainly intended to fix errors that prevent loading the data with
Expand All @@ -51,7 +47,7 @@ def fix_file( # noqa: PLR0913
-------
A path should only be returned if it points to the original (unchanged)
file (i.e., a fix was not necessary). If a fix is necessary, this function
should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object.
should return a :class:`~iris.cube.CubeList`.
Under no circumstances a copy of the input data should be created (this is
very inefficient).

Expand Down Expand Up @@ -80,10 +76,15 @@ def fix_file( # noqa: PLR0913

Returns
-------
str | pathlib.Path | xr.Dataset | ncdata.NcData:
:
Fixed data or a path to them.

"""
if not isinstance(file, Path):
# Skip this function for `esmvalcore.io.DataElement` that is not a path
# to a file.
return file

# Update extra_facets with variable information given as regular arguments
# to this function
extra_facets.update(
Expand All @@ -96,6 +97,7 @@ def fix_file( # noqa: PLR0913
},
)

result: Path | Sequence[Cube] = Path(file)
for fix in Fix.get_fixes(
project=project,
dataset=dataset,
Expand All @@ -105,12 +107,34 @@ def fix_file( # noqa: PLR0913
session=session,
frequency=frequency,
):
file = fix.fix_file(
file,
result = fix.fix_file(
result,
output_dir,
add_unique_suffix=add_unique_suffix,
)
return file

if isinstance(file, LocalFile):
# This happens when this function is called from
# `esmvalcore.dataset.Dataset.load`.
if isinstance(result, Path):
if result == file:
# No fixes have been applied, return the original file.
result = file
else:
# The file has been fixed and the result is a path to the fixed
# file. The result needs to be loaded to read the global
# attributes for recording provenance.
fixed_file = LocalFile(result)
fixed_file.facets = file.facets
fixed_file.ignore_warnings = file.ignore_warnings
result = fixed_file.to_iris()

if isinstance(result, Sequence) and isinstance(result[0], Cube):
# Set the attributes for recording provenance here because
# to_iris will not be called on the original file.
file.attributes = result[0].attributes.globals.copy()

return result


def fix_metadata(
Expand Down
1 change: 1 addition & 0 deletions esmvalcore/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
for attr, value in {
"save_split_attrs": True,
"date_microseconds": True,
"derived_bounds": True,
}.items():
with contextlib.suppress(AttributeError):
setattr(iris.FUTURE, attr, value)
2 changes: 0 additions & 2 deletions esmvalcore/preprocessor/_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from esmvalcore.cmor.check import CheckLevels
from esmvalcore.io.esgf.facets import FACETS
from esmvalcore.iris_helpers import merge_cube_attributes
from esmvalcore.preprocessor._shared import _rechunk_aux_factory_dependencies

if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
Expand Down Expand Up @@ -282,7 +281,6 @@ def concatenate(
cubes = _sort_cubes_by_time(cubes)
_fix_calendars(cubes)
cubes = _remove_time_overlaps(cubes)
cubes = [_rechunk_aux_factory_dependencies(cube) for cube in cubes]
result = _concatenate_cubes(cubes, check_level=check_level)

if len(result) == 1:
Expand Down
Loading