Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle derived coordinates correctly in concatenate #5096

Merged
merged 6 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ This document explains the changes made to Iris for this release
🐛 Bugs Fixed
=============

#. `@schlunma`_ fixed :meth:`iris.cube.CubeList.concatenate` so that it
preserves derived coordinates. (:issue:`2478`, :pull:`5096`)

#. `@trexfeathers`_ and `@pp-mo`_ made Iris' use of the `netCDF4`_ library
thread-safe. (:pull:`5095`)

Expand Down
186 changes: 186 additions & 0 deletions lib/iris/_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,39 @@ def name(self):
return self.defn.name()


class _DerivedCoordAndDims(
namedtuple("DerivedCoordAndDims", ["coord", "dims", "aux_factory"])
):
"""
Container for a derived coordinate, the associated AuxCoordFactory, and the
associated data dimension(s) spanned over a :class:`iris.cube.Cube`.

Args:

* coord:
A :class:`iris.coords.DimCoord` or :class:`iris.coords.AuxCoord`
coordinate instance.

* dims:
A tuple of the data dimension(s) spanned by the coordinate.

* aux_factory:
A :class:`iris.aux_factory.AuxCoordFactory` instance.

"""

__slots__ = ()

def __eq__(self, other):
"""Do not take aux factories into account for equality."""
result = NotImplemented
if isinstance(other, _DerivedCoordAndDims):
equal_coords = self.coord == other.coord
equal_dims = self.dims == other.dims
result = equal_coords and equal_dims
return result


class _OtherMetaData(namedtuple("OtherMetaData", ["defn", "dims"])):
"""
Container for the metadata that defines a cell measure or ancillary
Expand Down Expand Up @@ -280,6 +313,7 @@ def concatenate(
check_aux_coords=True,
check_cell_measures=True,
check_ancils=True,
check_derived_coords=True,
):
"""
Concatenate the provided cubes over common existing dimensions.
Expand All @@ -296,6 +330,30 @@ def concatenate(
If True, raise an informative
:class:`~iris.exceptions.ContatenateError` if registration fails.

* check_aux_coords
Checks if the points and bounds of auxiliary coordinates of the cubes
match. This check is not applied to auxiliary coordinates that span the
dimension the concatenation is occurring along. Defaults to True.

* check_cell_measures
Checks if the data of cell measures of the cubes match. This check is
not applied to cell measures that span the dimension the concatenation
is occurring along. Defaults to True.

* check_ancils
Checks if the data of ancillary variables of the cubes match. This
check is not applied to ancillary variables that span the dimension the
concatenation is occurring along. Defaults to True.

* check_derived_coords
Checks if the points and bounds of derived coordinates of the cubes
match. This check is not applied to derived coordinates that span the
dimension the concatenation is occurring along. Note that differences
in scalar coordinates and dimensional coordinates used to derive the
coordinate are still checked. Checks for auxiliary coordinates used to
derive the coordinates can be ignored with `check_aux_coords`. Defaults
to True.

Returns:
A :class:`iris.cube.CubeList` of concatenated :class:`iris.cube.Cube`
instances.
Expand All @@ -321,6 +379,7 @@ def concatenate(
check_aux_coords,
check_cell_measures,
check_ancils,
check_derived_coords,
)
if registered:
axis = proto_cube.axis
Expand Down Expand Up @@ -378,6 +437,8 @@ def __init__(self, cube):
self.cm_metadata = []
self.ancillary_variables_and_dims = []
self.av_metadata = []
self.derived_coords_and_dims = []
self.derived_metadata = []
self.dim_mapping = []

# Determine whether there are any anonymous cube dimensions.
Expand Down Expand Up @@ -437,6 +498,17 @@ def meta_key_func(dm):
av_and_dims = _CoordAndDims(av, tuple(dims))
self.ancillary_variables_and_dims.append(av_and_dims)

def name_key_func(factory):
return factory.name()

for factory in sorted(cube.aux_factories, key=name_key_func):
coord = factory.make_coord(cube.coord_dims)
dims = cube.coord_dims(coord)
metadata = _CoordMetaData(coord, dims)
self.derived_metadata.append(metadata)
coord_and_dims = _DerivedCoordAndDims(coord, tuple(dims), factory)
self.derived_coords_and_dims.append(coord_and_dims)

def _coordinate_differences(self, other, attr, reason="metadata"):
"""
Determine the names of the coordinates that differ between `self` and
Expand Down Expand Up @@ -544,6 +616,14 @@ def match(self, other, error_on_mismatch):
msgs.append(
msg_template.format("Ancillary variables", *differences)
)
# Check derived coordinates.
if self.derived_metadata != other.derived_metadata:
differences = self._coordinate_differences(
other, "derived_metadata"
)
msgs.append(
msg_template.format("Derived coordinates", *differences)
)
# Check scalar coordinates.
if self.scalar_coords != other.scalar_coords:
differences = self._coordinate_differences(
Expand Down Expand Up @@ -597,6 +677,7 @@ def __init__(self, cube_signature):
self.ancillary_variables_and_dims = (
cube_signature.ancillary_variables_and_dims
)
self.derived_coords_and_dims = cube_signature.derived_coords_and_dims
self.dim_coords = cube_signature.dim_coords
self.dim_mapping = cube_signature.dim_mapping
self.dim_extents = []
Expand Down Expand Up @@ -779,6 +860,11 @@ def concatenate(self):
# Concatenate the new ancillary variables
ancillary_variables_and_dims = self._build_ancillary_variables()

# Concatenate the new aux factories
aux_factories = self._build_aux_factories(
dim_coords_and_dims, aux_coords_and_dims
)

# Concatenate the new data payload.
data = self._build_data()

Expand All @@ -790,6 +876,7 @@ def concatenate(self):
aux_coords_and_dims=aux_coords_and_dims,
cell_measures_and_dims=cell_measures_and_dims,
ancillary_variables_and_dims=ancillary_variables_and_dims,
aux_factories=aux_factories,
**kwargs,
)
else:
Expand All @@ -807,6 +894,7 @@ def register(
check_aux_coords=False,
check_cell_measures=False,
check_ancils=False,
check_derived_coords=False,
):
"""
Determine whether the given source-cube is suitable for concatenation
Expand All @@ -827,6 +915,31 @@ def register(
* error_on_mismatch:
If True, raise an informative error if registration fails.

* check_aux_coords
Checks if the points and bounds of auxiliary coordinates of the
cubes match. This check is not applied to auxiliary coordinates
that span the dimension the concatenation is occurring along.
Defaults to False.

* check_cell_measures
Checks if the data of cell measures of the cubes match. This check
is not applied to cell measures that span the dimension the
concatenation is occurring along. Defaults to False.

* check_ancils
Checks if the data of ancillary variables of the cubes match. This
check is not applied to ancillary variables that span the dimension
the concatenation is occurring along. Defaults to False.

* check_derived_coords
Checks if the points and bounds of derived coordinates of the cubes
match. This check is not applied to derived coordinates that span
the dimension the concatenation is occurring along. Note that
differences in scalar coordinates and dimensional coordinates used
to derive the coordinate are still checked. Checks for auxiliary
coordinates used to derive the coordinates can be ignored with
`check_aux_coords`. Defaults to False.

Returns:
Boolean.

Expand Down Expand Up @@ -905,6 +1018,21 @@ def register(
if not coord_a == coord_b:
match = False

# Check for compatible derived coordinates.
if match:
if check_derived_coords:
for coord_a, coord_b in zip(
self._cube_signature.derived_coords_and_dims,
cube_signature.derived_coords_and_dims,
):
# Derived coords that span the candidate axis can differ
if (
candidate_axis not in coord_a.dims
or candidate_axis not in coord_b.dims
):
if not coord_a == coord_b:
match = False

if match:
# Register the cube as a source-cube for this proto-cube.
self._add_skeleton(coord_signature, cube.lazy_data())
Expand Down Expand Up @@ -1088,6 +1216,64 @@ def _build_ancillary_variables(self):

return ancillary_variables_and_dims

def _build_aux_factories(self, dim_coords_and_dims, aux_coords_and_dims):
"""
Generate the aux factories for the new concatenated cube.

Args:

* dim_coords_and_dims:
A list of dimension coordinate and dimension tuple pairs from the
concatenated cube.

* aux_coords_and_dims:
A list of auxiliary coordinates and dimension(s) tuple pairs from
the concatenated cube.

Returns:
A list of :class:`iris.aux_factory.AuxCoordFactory`.

"""
# Setup convenience hooks.
cube_signature = self._cube_signature
old_dim_coords = cube_signature.dim_coords
old_aux_coords = [a[0] for a in cube_signature.aux_coords_and_dims]
new_dim_coords = [d[0] for d in dim_coords_and_dims]
new_aux_coords = [a[0] for a in aux_coords_and_dims]
scalar_coords = cube_signature.scalar_coords

aux_factories = []

# Generate all the factories for the new concatenated cube.
for i, (coord, dims, factory) in enumerate(
cube_signature.derived_coords_and_dims
):
# Check whether the derived coordinate of the factory spans the
# nominated dimension of concatenation.
if self.axis in dims:
# Update the dependencies of the factory with coordinates of
# the concatenated cube. We need to check all coordinate types
# here (dim coords, aux coords, and scalar coords).
new_dependencies = {}
for old_dependency in factory.dependencies.values():
if old_dependency in old_dim_coords:
dep_idx = old_dim_coords.index(old_dependency)
new_dependency = new_dim_coords[dep_idx]
elif old_dependency in old_aux_coords:
dep_idx = old_aux_coords.index(old_dependency)
new_dependency = new_aux_coords[dep_idx]
else:
dep_idx = scalar_coords.index(old_dependency)
new_dependency = scalar_coords[dep_idx]
new_dependencies[id(old_dependency)] = new_dependency

# Create new factory with the updated dependencies.
factory = factory.updated(new_dependencies)

aux_factories.append(factory)

return aux_factories

def _build_data(self):
"""
Generate the data payload for the new concatenated cube.
Expand Down