Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalise units of coordinate bounds #5746

Merged
merged 7 commits into from Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Expand Up @@ -93,6 +93,10 @@ This document explains the changes made to Iris for this release
:mod:`iris.fileformats._nc_load_rules.helpers` to lessen warning duplication.
(:issue:`5536`, :pull:`5685`)

#. `@bjlittle`_ fixed coordinate construction in the NetCDF loading pipeline to
ensure that bounds have the same units as the associated points.
(:issue:`1801`, :pull:`5746`)


馃挘 Incompatible Changes
=======================
Expand Down
70 changes: 67 additions & 3 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Expand Up @@ -13,8 +13,10 @@
build routines, and which it does not use.

"""
from __future__ import annotations

import re
from typing import List
from typing import TYPE_CHECKING, List, Optional
import warnings

import cf_units
Expand All @@ -35,6 +37,12 @@
import iris.std_names
import iris.util

if TYPE_CHECKING:
from numpy.ma import MaskedArray
from numpy.typing import ArrayLike

from iris.fileformats.cf import CFBoundaryVariable

# TODO: should un-addable coords / cell measures / etcetera be skipped? iris#5068.

#
Expand Down Expand Up @@ -896,8 +904,9 @@ def get_attr_units(cf_var, attributes):
cf_units.as_unit(attr_units)
except ValueError:
# Using converted unicode message. Can be reverted with Python 3.
msg = "Ignoring netCDF variable {!r} invalid units {!r}".format(
cf_var.cf_name, attr_units
msg = (
f"Ignoring invalid units {attr_units!r} on netCDF variable "
f"{cf_var.cf_name!r}."
)
warnings.warn(
msg,
Expand Down Expand Up @@ -1024,6 +1033,57 @@ def reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var):
return bounds_data


################################################################################
def _normalise_bounds_units(
points_units: str, cf_bounds_var: CFBoundaryVariable, bounds_data: ArrayLike
) -> Optional[MaskedArray]:
"""Ensure bounds have units compatible with points.

If required, the `bounds_data` will be converted to the `points_units`.
If the bounds units are not convertible, a warning will be issued and
the `bounds_data` will be ignored.

Bounds with invalid units will be gracefully left unconverted and passed through.

Parameters
----------
points_units : str
The units of the coordinate points.
cf_bounds_var : CFBoundaryVariable
The serialized NetCDF bounds variable.
bounds_data : MaskedArray
The pre-processed data of the bounds variable.

Returns
-------
MaskedArray or None
The bounds data with the same units as the points, or ``None``
if the bounds units are not convertible to the points units.

"""
bounds_units = get_attr_units(cf_bounds_var, {})
bjlittle marked this conversation as resolved.
Show resolved Hide resolved

if bounds_units != UNKNOWN_UNIT_STRING:
points_units = cf_units.Unit(points_units)
bounds_units = cf_units.Unit(bounds_units)

if bounds_units != points_units:
if bounds_units.is_convertible(points_units):
bounds_data = bounds_units.convert(bounds_data, points_units)
else:
wmsg = (
f"Ignoring bounds on NetCDF variable {cf_bounds_var.cf_name!r}. "
f"Expected units compatible with {points_units.origin!r}, got "
f"{bounds_units.origin!r}."
)
warnings.warn(
wmsg, category=iris.exceptions.IrisCfLoadWarning, stacklevel=2
)
bounds_data = None

return bounds_data


################################################################################
def build_dimension_coordinate(
engine, cf_coord_var, coord_name=None, coord_system=None
Expand Down Expand Up @@ -1061,6 +1121,8 @@ def build_dimension_coordinate(
# dimension names.
if cf_bounds_var.shape[:-1] != cf_coord_var.shape:
bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var)

bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data)
else:
bounds_data = None

Expand Down Expand Up @@ -1186,6 +1248,8 @@ def build_auxiliary_coordinate(
# compatibility with array creators (i.e. dask)
bounds_data = np.asarray(bounds_data)
bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var)

bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data)
else:
bounds_data = None

Expand Down
@@ -0,0 +1,102 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Test function :func:`iris.fileformats._nc_load_rules.helpers._normalise_bounds_units`."""

# import iris tests first so that some things can be initialised before
# importing anything else
from typing import Optional
from unittest import mock

import numpy as np
import pytest

from iris.exceptions import IrisCfLoadWarning
from iris.fileformats._nc_load_rules.helpers import (
_normalise_bounds_units,
_WarnComboIgnoringCfLoad,
)

BOUNDS = mock.sentinel.bounds
CF_NAME = "dummy_bnds"


def _make_cf_bounds_var(
units: Optional[str] = None,
unitless: bool = False,
) -> mock.MagicMock:
"""Construct a mock CF bounds variable."""
if units is None:
units = "days since 1970-01-01"

cf_data = mock.Mock(spec=[])
# we want to mock the absence of flag attributes to helpers.get_attr_units
# see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes
del cf_data.flag_values
del cf_data.flag_masks
del cf_data.flag_meanings

cf_var = mock.MagicMock(
cf_name=CF_NAME,
cf_data=cf_data,
units=units,
calendar=None,
dtype=float,
)

if unitless:
del cf_var.units

return cf_var


def test_unitless() -> None:
"""Test bounds variable with no units."""
cf_bounds_var = _make_cf_bounds_var(unitless=True)
result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS)
assert result == BOUNDS


def test_invalid_units__pass_through() -> None:
"""Test bounds variable with invalid units."""
units = "invalid"
cf_bounds_var = _make_cf_bounds_var(units=units)
wmsg = f"Ignoring invalid units {units!r} on netCDF variable {CF_NAME!r}"
with pytest.warns(_WarnComboIgnoringCfLoad, match=wmsg):
result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS)
assert result == BOUNDS


@pytest.mark.parametrize("units", ["unknown", "no_unit", "1", "kelvin"])
def test_ignore_bounds(units) -> None:
"""Test bounds variable with incompatible units compared to points."""
points_units = "km"
cf_bounds_var = _make_cf_bounds_var(units=units)
wmsg = (
f"Ignoring bounds on NetCDF variable {CF_NAME!r}. "
f"Expected units compatible with {points_units!r}"
)
with pytest.warns(IrisCfLoadWarning, match=wmsg):
result = _normalise_bounds_units(points_units, cf_bounds_var, BOUNDS)
assert result is None


def test_compatible() -> None:
"""Test bounds variable with compatible units requiring conversion."""
points_units, bounds_units = "days since 1970-01-01", "hours since 1970-01-01"
cf_bounds_var = _make_cf_bounds_var(units=bounds_units)
bounds = np.arange(10, dtype=float) * 24
result = _normalise_bounds_units(points_units, cf_bounds_var, bounds)
expected = bounds / 24
np.testing.assert_array_equal(result, expected)


def test_same_units() -> None:
"""Test bounds variable with same units as points."""
units = "days since 1970-01-01"
cf_bounds_var = _make_cf_bounds_var(units=units)
bounds = np.arange(10, dtype=float)
result = _normalise_bounds_units(units, cf_bounds_var, bounds)
np.testing.assert_array_equal(result, bounds)
assert result is bounds
Expand Up @@ -45,7 +45,7 @@ def setUp(self):
cf_data=cf_data,
standard_name=None,
long_name="wibble",
units="m",
units="km",
shape=points.shape,
size=np.prod(points.shape),
dtype=points.dtype,
Expand Down Expand Up @@ -96,31 +96,42 @@ def _get_per_test_bounds_var(_coord_unused):
)

@classmethod
def _make_array_and_cf_data(cls, dimension_names):
def _make_array_and_cf_data(cls, dimension_names, rollaxis=False):
shape = tuple(cls.dim_names_lens[name] for name in dimension_names)
cf_data = mock.MagicMock(_FillValue=None, spec=[])
cf_data.chunking = mock.MagicMock(return_value=shape)
return np.zeros(shape), cf_data

def _make_cf_bounds_var(self, dimension_names):
data = np.arange(np.prod(shape), dtype=float)
if rollaxis:
shape = shape[1:] + (shape[0],)
data = data.reshape(shape)
data = np.rollaxis(data, -1)
else:
data = data.reshape(shape)
return data, cf_data

def _make_cf_bounds_var(self, dimension_names, rollaxis=False):
# Create the bounds cf variable.
bounds, cf_data = self._make_array_and_cf_data(dimension_names)
bounds, cf_data = self._make_array_and_cf_data(
dimension_names, rollaxis=rollaxis
)
bounds *= 1000 # Convert to metres.
cf_bounds_var = mock.Mock(
spec=CFVariable,
dimensions=dimension_names,
cf_name="wibble_bnds",
cf_data=cf_data,
units="m",
shape=bounds.shape,
size=np.prod(bounds.shape),
dtype=bounds.dtype,
__getitem__=lambda self, key: bounds[key],
)

return bounds, cf_bounds_var
return cf_bounds_var

def _check_case(self, dimension_names):
bounds, self.cf_bounds_var = self._make_cf_bounds_var(
dimension_names=dimension_names
def _check_case(self, dimension_names, rollaxis=False):
self.cf_bounds_var = self._make_cf_bounds_var(
dimension_names, rollaxis=rollaxis
)

# Asserts must lie within context manager because of deferred loading.
Expand All @@ -133,15 +144,15 @@ def _check_case(self, dimension_names):
expected_list = [(self.expected_coord, self.cf_coord_var.cf_name)]
self.assertEqual(self.engine.cube_parts["coordinates"], expected_list)

def test_fastest_varying_vertex_dim(self):
def test_fastest_varying_vertex_dim__normalise_bounds(self):
# The usual order.
self._check_case(dimension_names=("foo", "bar", "nv"))

def test_slowest_varying_vertex_dim(self):
def test_slowest_varying_vertex_dim__normalise_bounds(self):
# Bounds in the first (slowest varying) dimension.
self._check_case(dimension_names=("nv", "foo", "bar"))
self._check_case(dimension_names=("nv", "foo", "bar"), rollaxis=True)

def test_fastest_with_different_dim_names(self):
def test_fastest_with_different_dim_names__normalise_bounds(self):
# Despite the dimension names ('x', and 'y') differing from the coord's
# which are 'foo' and 'bar' (as permitted by the cf spec),
# this should still work because the vertex dim is the fastest varying.
Expand Down Expand Up @@ -232,6 +243,7 @@ def setUp(self):
)

points = np.arange(6)
units = "days since 1970-01-01"
self.cf_coord_var = mock.Mock(
spec=threadsafe_nc.VariableWrapper,
dimensions=("foo",),
Expand All @@ -241,7 +253,7 @@ def setUp(self):
cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None), spec=[]),
standard_name=None,
long_name="wibble",
units="days since 1970-01-01",
units=units,
calendar=None,
shape=points.shape,
size=np.prod(points.shape),
Expand All @@ -250,13 +262,20 @@ def setUp(self):
)

bounds = np.arange(12).reshape(6, 2)
cf_data = mock.MagicMock(chunking=mock.Mock(return_value=None))
# we want to mock the absence of flag attributes to helpers.get_attr_units
# see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes
del cf_data.flag_values
del cf_data.flag_masks
del cf_data.flag_meanings
self.cf_bounds_var = mock.Mock(
spec=threadsafe_nc.VariableWrapper,
dimensions=("x", "nv"),
scale_factor=1,
add_offset=0,
cf_name="wibble_bnds",
cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None)),
cf_data=cf_data,
units=units,
shape=bounds.shape,
size=np.prod(bounds.shape),
dtype=bounds.dtype,
Expand Down