diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 026917a27..fc6239805 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,8 @@ Bug fixes Internal changes ^^^^^^^^^^^^^^^^ -* Changed french translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`). +* Add attribute ``units_metadata`` to outputs representing a difference between temperatures. This is needed to disambiguate temperature differences from absolute temperature, and affects indicators using functions ``cumulative_difference``, ``temperature_sum``, ``interday_diurnal_temperature_range`` and `` extreme_temperature_range``. Added ``pint2cfattrs`` function to convert pint units to a dictionary of CF attributes. ``units2pint`` is also modified to support ``units_metadata`` attributes in DataArrays. Some SDBA properties and measures previously returning units of ``delta_degC`` will now return the original input DataArray units accompanied with the ``units_metadata`` attribute. (:issue:`1822`, :pull:`1830`). +* Changed French translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1836`). CI changes ^^^^^^^^^^ diff --git a/docs/notebooks/units.ipynb b/docs/notebooks/units.ipynb index beaaa6a4b..44d2fc443 100644 --- a/docs/notebooks/units.ipynb +++ b/docs/notebooks/units.ipynb @@ -223,6 +223,48 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Temperature differences vs absolute temperature\n", + "\n", + "Temperature anomalies and biases as well as degree-days indicators are all *differences* between temperatures. If we assign those differences units of degrees Celsius, then converting to Kelvins or Fahrenheits will yield nonsensical values. ``pint`` solves this using *delta* units such as ``delta_degC`` and ``delta_degF``. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If we have a DataArray storing a temperature anomaly of 2°C, converting to Kelvin will yield a nonsensical value 0f 275.15.\n", + "# Fortunately, pint has delta units to represent temperature differences.\n", + "display(units.convert_units_to(xr.DataArray([2], attrs={\"units\": \"delta_degC\"}), \"K\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The issue for ``xclim`` is that there are no equivalent delta units in the CF convention. To resolve this ambiguity, the [CF convention](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#temperature-units) recommends including a ``units_metadata`` attribute set to ``\"temperature: difference\"``, and this is supported in ``xclim`` as of version 0.52. The function ``units2pint`` interprets the ``units_metadata`` attribute and returns a ``pint`` delta unit as needed. To convert a ``pint`` delta unit to CF attributes, use the function ``pint2cfattrs``, which returns a dictionary with the ``units`` and ``units_metadata`` attributes (``pint2cfunits`` cannot support the convention because it only returns the unit string). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "delta = xr.DataArray(\n", + " [2], attrs={\"units\": \"K\", \"units_metadata\": \"temperature: difference\"}\n", + ")\n", + "units.convert_units_to(delta, \"delta_degC\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", "## Other utilities\n", "\n", "Many helper functions are defined in `xclim.core.units`, see [Unit handling module](../api.rst#units-handling-submodule).\n" diff --git a/tests/test_generic.py b/tests/test_generic.py index c1eab6bf2..2c34ad4f8 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -332,6 +332,12 @@ def test_forbidden(self, tas_series): with pytest.raises(NotImplementedError): generic.cumulative_difference(tas, threshold="10 degC", op="!=") + def test_delta_units(self, tas_series): + tas = tas_series(np.array([-10, 15, 20, 3, 10]) + K2C) + + out = generic.cumulative_difference(tas, threshold="10 degC", op=">=") + assert "units_metadata" in out.attrs + class TestFirstDayThreshold: @pytest.mark.parametrize( diff --git a/tests/test_sdba/test_properties.py b/tests/test_sdba/test_properties.py index 2c413bc85..38164e840 100644 --- a/tests/test_sdba/test_properties.py +++ b/tests/test_sdba/test_properties.py @@ -287,7 +287,8 @@ def test_annual_cycle(self, open_dataset): assert amp.long_name.startswith("Absolute amplitude of the annual cycle") assert phase.long_name.startswith("Phase of the annual cycle") - assert amp.units == "delta_degC" + assert amp.units == "K" + assert amp.units_metadata == "temperature: difference" assert relamp.units == "%" assert phase.units == "" @@ -330,7 +331,8 @@ def test_annual_range(self, open_dataset): assert amp.long_name.startswith("Average annual absolute amplitude") assert phase.long_name.startswith("Average annual phase") - assert amp.units == "delta_degC" + assert amp.units == "K" + assert amp.units_metadata == "temperature: difference" assert relamp.units == "%" assert phase.units == "" diff --git a/tests/test_units.py b/tests/test_units.py index b59626fde..562cf98da 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -18,6 +18,7 @@ declare_units, infer_context, lwethickness2amount, + pint2cfattrs, pint2cfunits, pint_multiply, rate2amount, @@ -117,6 +118,14 @@ def test_cf_conversion_amount2lwethickness_amount2rate(self): assert out.attrs["units"] == "kg d-1 m-2" # CF equivalent unit assert out.attrs["standard_name"] == "rainfall_flux" + def test_temperature_difference(self): + delta = xr.DataArray( + [2], attrs={"units": "K", "units_metadata": "temperature: difference"} + ) + out = convert_units_to(source=delta, target="delta_degC") + assert out == 2 + assert out.attrs["units"] == "degC" + class TestUnitConversion: def test_pint2cfunits(self): @@ -347,3 +356,27 @@ def test_to_agg_units(in_u, opfunc, op, exp, exp_u): out = to_agg_units(getattr(da, opfunc)(), da, op) np.testing.assert_allclose(out, exp) assert out.attrs["units"] == exp_u + + +def test_pint2cfattrs(): + attrs = pint2cfattrs(units.degK, is_difference=True) + assert attrs == {"units": "K", "units_metadata": "temperature: difference"} + + attrs = pint2cfattrs(units.meter, is_difference=True) + assert "units_metadata" not in attrs + + attrs = pint2cfattrs(units.delta_degC) + assert attrs == {"units": "degC", "units_metadata": "temperature: difference"} + + +def test_temp_difference_rountrip(): + """Test roundtrip of temperature difference units.""" + attrs = {"units": "degC", "units_metadata": "temperature: difference"} + da = xr.DataArray([1], attrs=attrs) + pu = units2pint(da) + # Confirm that we get delta pint units + assert pu == units.delta_degC + + # and that converting those back to cf attrs gives the same result + attrs = pint2cfattrs(pu) + assert attrs == {"units": "degC", "units_metadata": "temperature: difference"} diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 74777dbe9..c3e5df64e 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -344,6 +344,7 @@ class Indicator(IndicatorRegistrar): "standard_name", "long_name", "units", + "units_metadata", "cell_methods", "description", "comment", @@ -856,7 +857,10 @@ def __call__(self, *args, **kwds): for out, attrs, base_attrs in zip(outs, out_attrs, self.cf_attrs): if self.n_outs > 1: var_id = base_attrs["var_name"] + # Set default units attrs.update(units=out.units) + if "units_metadata" in out.attrs: + attrs["units_metadata"] = out.attrs["units_metadata"] attrs.update( self._update_attrs( params.copy(), @@ -869,7 +873,7 @@ def __call__(self, *args, **kwds): # Convert to output units outs = [ - convert_units_to(out, attrs["units"], self.context) + convert_units_to(out, attrs, self.context) for out, attrs in zip(outs, out_attrs) ] diff --git a/xclim/core/units.py b/xclim/core/units.py index 1c6ec4b62..5dd1ed62b 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -122,26 +122,44 @@ def _func_register(func: Callable) -> Callable: units.define("[radiation] = [power] / [length]**2") -def units2pint(value: xr.DataArray | str | units.Quantity) -> pint.Unit: +def units2pint( + value: xr.DataArray | units.Unit | units.Quantity | dict | str, +) -> pint.Unit: """Return the pint Unit for the DataArray units. Parameters ---------- - value : xr.DataArray or str or pint.Quantity + value : xr.DataArray or pint.Unit or pint.Quantity or dict or str Input data array or string representing a unit (with no magnitude). Returns ------- pint.Unit Units of the data array. + + Notes + ----- + To avoid ambiguity related to differences in temperature vs absolute temperatures, set the `units_metadata` + attribute to `"temperature: difference"` or `"temperature: on_scale"` on the DataArray. """ - if isinstance(value, str): - unit = value - elif isinstance(value, xr.DataArray): - unit = value.attrs["units"] - elif isinstance(value, units.Quantity): + # Value is already a pint unit or a pint quantity + if isinstance(value, units.Unit): + return value + + if isinstance(value, units.Quantity): # This is a pint.PlainUnit, which is not the same as a pint.Unit return cast(pint.Unit, value.units) + + # We only need the attributes + if isinstance(value, xr.DataArray): + value = value.attrs + + if isinstance(value, str): + unit = value + metadata = None + elif isinstance(value, dict): + unit = value["units"] + metadata = value.get("units_metadata", None) else: raise NotImplementedError(f"Value of type `{type(value)}` not supported.") @@ -164,7 +182,10 @@ def units2pint(value: xr.DataArray | str | units.Quantity) -> pint.Unit: "Remove white space from temperature units, e.g. use `degC`." ) - return units.parse_units(unit) + pu = units.parse_units(unit) + if metadata == "temperature: difference": + return (1 * pu - 1 * pu).units + return pu def pint2cfunits(value: units.Quantity | units.Unit) -> str: @@ -185,11 +206,46 @@ def pint2cfunits(value: units.Quantity | units.Unit) -> str: # Issue originally introduced in https://github.com/hgrecco/pint/issues/1486 # Should be resolved in pint v0.24. See: https://github.com/hgrecco/pint/issues/1913 + # Relies on cf_xarray's unit string formatting logic. with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) return f"{value:cf}".replace("dimensionless", "") +def pint2cfattrs(value: units.Quantity | units.Unit, is_difference=None) -> dict: + """Return CF-compliant units attributes from a `pint` unit. + + Parameters + ---------- + value : pint.Unit + Input unit. + is_difference : bool + Whether the value represent a difference in temperature, which is ambiguous in the case of absolute + temperature scales like Kelvin or Rankine. It will automatically be set to True if units are "delta_*" + units. + + Returns + ------- + dict + Units following CF-Convention, using symbols. + """ + s = pint2cfunits(value) + if "delta_" in s: + is_difference = True + s = s.replace("delta_", "") + + attrs = {"units": s} + if "[temperature]" in value.dimensionality: + if is_difference: + attrs["units_metadata"] = "temperature: difference" + elif is_difference is False: + attrs["units_metadata"] = "temperature: on_scale" + else: + attrs["units_metadata"] = "temperature: unknown" + + return attrs + + def ensure_cf_units(ustr: str) -> str: """Ensure the passed unit string is CF-compliant. @@ -253,7 +309,7 @@ def str2pint(val: str) -> pint.Quantity: # FIXME: The typing here is difficult to determine, as Generics cannot be used to track the type of the output. def convert_units_to( # noqa: C901 source: Quantified, - target: Quantified | units.Unit, + target: Quantified | units.Unit | dict, context: Literal["infer", "hydro", "none"] | None = None, ) -> xr.DataArray | float: """Convert a mathematical expression into a value with the same units as a DataArray. @@ -265,7 +321,7 @@ def convert_units_to( # noqa: C901 ---------- source : str or xr.DataArray or units.Quantity The value to be converted, e.g. '4C' or '1 mm/d'. - target : str or xr.DataArray or units.Quantity or units.Unit + target : str or xr.DataArray or units.Quantity or units.Unit or dict Target array of values to which units must conform. context : {"infer", "hydro", "none"}, optional The unit definition context. Default: None. @@ -292,14 +348,7 @@ def convert_units_to( # noqa: C901 context = context or "none" # Target units - if isinstance(target, units.Unit): - target_unit = target - elif isinstance(target, (str, xr.DataArray)): - target_unit = units2pint(target) - else: - raise NotImplementedError( - "target must be either a pint Unit or a xarray DataArray." - ) + target_unit = units2pint(target) if context == "infer": ctxs = [] @@ -325,7 +374,7 @@ def convert_units_to( # noqa: C901 if isinstance(source, xr.DataArray): source_unit = units2pint(source) - target_cf_unit = pint2cfunits(target_unit) + target_cf_attrs = pint2cfattrs(target_unit) # Automatic pre-conversions based on the dimensionalities and CF standard names standard_name = source.attrs.get("standard_name") @@ -360,12 +409,12 @@ def convert_units_to( # noqa: C901 out: xr.DataArray if source_unit == target_unit: # The units are the same, but the symbol may not be. - out = source.assign_attrs(units=target_cf_unit) + out = source.assign_attrs(**target_cf_attrs) return out with units.context(context or "none"): out = source.copy(data=units.convert(source.data, source_unit, target_unit)) - out = out.assign_attrs(units=target_cf_unit) + out = out.assign_attrs(**target_cf_attrs) return out # TODO remove backwards compatibility of int/float thresholds after v1.0 release @@ -563,17 +612,21 @@ def to_agg_units( elif op in ["count", "integral"]: m, freq_u_raw = infer_sampling_units(orig[dim]) + # TODO: Use delta here orig_u = str2pint(ensure_absolute_temperature(orig.units)) freq_u = str2pint(freq_u_raw) - out = out * m + with xr.set_options(keep_attrs=True): + out = out * m if op == "count": out.attrs["units"] = freq_u_raw elif op == "integral": if "[time]" in orig_u.dimensionality: # We need to simplify units after multiplication + out_units = (orig_u * freq_u).to_reduced_units() - out = out * out_units.magnitude + with xr.set_options(keep_attrs=True): + out = out * out_units.magnitude out.attrs["units"] = pint2cfunits(out_units) else: out.attrs["units"] = pint2cfunits(orig_u * freq_u) @@ -583,6 +636,10 @@ def to_agg_units( "Known ops are [min, max, mean, std, var, doymin, doymax, count, integral, sum]." ) + # Remove units_metadata where it doesn't make sense + if op in ["doymin", "doymax", "count"]: + out.attrs.pop("units_metadata", None) + return out @@ -1328,7 +1385,7 @@ def wrapper(*args, **kwargs): return dec -def ensure_delta(unit: str) -> str: +def ensure_delta(unit: xr.DataArray | str | units.Quantity) -> str: """Return delta units for temperature. For dimensions where delta exist in pint (Temperature), it replaces the temperature unit by delta_degC or @@ -1344,6 +1401,8 @@ def ensure_delta(unit: str) -> str: # delta_unit = pint2cfunits(d - d) # replace kelvin/rankine by delta_degC/F + # Note (DH): This will fail if dimension is [temperature]^-1 or [temperature]^2 (e.g. variance) + # Recent versions of pint have a `to_preferred` method that could be used here (untested). if "kelvin" in u._units: delta_unit = pint2cfunits(u / units2pint("K") * units2pint("delta_degC")) if "degree_Rankine" in u._units: diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index a0e919f62..5274e7cf4 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -22,9 +22,11 @@ convert_units_to, declare_relative_units, infer_context, + pint2cfattrs, pint2cfunits, str2pint, to_agg_units, + units2pint, ) from xclim.core.utils import DayOfYearStr, Quantified, Quantity @@ -99,6 +101,12 @@ def select_resample_op( op = op.__name__ if out_units is not None: return out.assign_attrs(units=out_units) + + if op in ["std", "var"]: + out.attrs.update( + pint2cfattrs(units2pint(out.attrs["units"]), is_difference=True) + ) + return to_agg_units(out, da, op) @@ -701,6 +709,7 @@ def temperature_sum( out = (data - threshold).where(cond).resample(time=freq).sum() out = direction * out + out.attrs["units_metadata"] = "temperature: difference" return to_agg_units(out, data, "integral") @@ -718,7 +727,6 @@ def interday_diurnal_temperature_range( freq : str Resampling frequency defining the periods as defined in :ref:`timeseries.resampling`. - Returns ------- xr.DataArray @@ -728,8 +736,8 @@ def interday_diurnal_temperature_range( vdtr = abs((high_data - low_data).diff(dim="time")) out = vdtr.resample(time=freq).mean(dim="time") - u = str2pint(low_data.units) - out.attrs["units"] = pint2cfunits(u - u) + out.attrs["units"] = low_data.attrs["units"] + out.attrs["units_metadata"] = "temperature: difference" return out @@ -755,8 +763,8 @@ def extreme_temperature_range( out = high_data.resample(time=freq).max() - low_data.resample(time=freq).min() - u = str2pint(low_data.units) - out.attrs["units"] = pint2cfunits(u - u) + out.attrs["units"] = low_data.attrs["units"] + out.attrs["units_metadata"] = "temperature: difference" return out @@ -892,6 +900,8 @@ def cumulative_difference( if freq is not None: diff = diff.resample(time=freq).sum(dim="time") + diff.attrs.update(pint2cfattrs(units2pint(data.attrs["units"]), is_difference=True)) + return to_agg_units(diff, data, op="integral") diff --git a/xclim/sdba/measures.py b/xclim/sdba/measures.py index 43432d5ee..fe0ee2582 100644 --- a/xclim/sdba/measures.py +++ b/xclim/sdba/measures.py @@ -15,7 +15,7 @@ import xarray as xr from xclim.core.indicator import Indicator, base_registry -from xclim.core.units import convert_units_to, ensure_delta, units2pint +from xclim.core.units import convert_units_to, pint2cfattrs, units2pint from xclim.core.utils import InputKind from .base import Grouper @@ -173,7 +173,7 @@ def _bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: Absolute bias """ out = sim - ref - out.attrs["units"] = ensure_delta(ref.attrs["units"]) + out.attrs.update(pint2cfattrs(units2pint(ref.attrs["units"]), is_difference=True)) return out @@ -296,7 +296,7 @@ def _rmse_internal(_sim: xr.DataArray, _ref: xr.DataArray) -> xr.DataArray: input_core_dims=[["time"], ["time"]], dask="parallelized", ) - out = out.assign_attrs(units=ensure_delta(ref.units)) + out = out.assign_attrs(pint2cfattrs(units2pint(ref.units), is_difference=True)) return out @@ -343,7 +343,7 @@ def _mae_internal(_sim: xr.DataArray, _ref: xr.DataArray) -> xr.DataArray: input_core_dims=[["time"], ["time"]], dask="parallelized", ) - out = out.assign_attrs(units=ensure_delta(ref.units)) + out = out.assign_attrs(pint2cfattrs(units2pint(ref.units), is_difference=True)) return out diff --git a/xclim/sdba/properties.py b/xclim/sdba/properties.py index ac82cc9e4..4cf3a6d57 100644 --- a/xclim/sdba/properties.py +++ b/xclim/sdba/properties.py @@ -20,7 +20,7 @@ import xclim as xc from xclim.core.indicator import Indicator, base_registry -from xclim.core.units import convert_units_to, to_agg_units +from xclim.core.units import convert_units_to, pint2cfattrs, to_agg_units, units2pint from xclim.core.utils import uses_dask from xclim.indices import run_length as rl from xclim.indices.generic import compare, select_resample_op @@ -583,7 +583,7 @@ def _annual_cycle( # TODO: In April 2024, use a match-case. if stat == "absamp": out = ac.max("dayofyear") - ac.min("dayofyear") - out.attrs["units"] = xc.core.units.ensure_delta(units) + out.attrs.update(pint2cfattrs(units2pint(units), is_difference=True)) elif stat == "relamp": out = (ac.max("dayofyear") - ac.min("dayofyear")) * 100 / ac.mean("dayofyear") out.attrs["units"] = "%" @@ -699,7 +699,7 @@ def _annual_statistic( if stat == "absamp": out = yrs.max() - yrs.min() - out.attrs["units"] = xc.core.units.ensure_delta(units) + out.attrs.update(pint2cfattrs(units2pint(units), is_difference=True)) elif stat == "relamp": out = (yrs.max() - yrs.min()) * 100 / yrs.mean() out.attrs["units"] = "%"