Ouranosinc · juliettelavoie · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -11,6 +11,10 @@ Internal changes
 * Granular permissions and dependency scanning actions have been added to all GitHub CI Workflows. (:pull:`313`).
 * Updated the list of dependencies to add missing requirements. (:pull:`314`).
 
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* Added a new argument ``indicators_kw`` to ``xs.ensembles.build_partition_data``. (:pull:`315`).
+
 v0.8.0 (2024-01-16)
 -------------------
 Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Pascal Bourgault (:user:`aulemahal`), Juliette Lavoie (:user:`juliettelavoie`), Sarah-Claude Bourdeau-Goulet (:user:`sarahclaude`), Trevor James Smith (:user:`Zeitsperre`), Marco Braun (:user:`vindelico`).

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
@@ -185,7 +185,8 @@
    "source": [
     "From a dictionary of datasets, the function creates a dataset with new dimensions in `partition_dim`(`[\"source\", \"experiment\", \"bias_adjust_project\"]`, if they exist). In this toy example, we only have different experiments.\n",
     "- By default, it translates the xscen vocabulary (eg. `experiment`) to the xclim partition vocabulary (eg. `scenario`). It is possible to pass `rename_dict` to rename the dimensions with other names.\n",
-    "- If the inputs are not on the same grid, they can be regridded through `regrid_kw` or subset to a point through `subset_kw`. The functions assumes that if there are different `bias_adjust_project`, they will be on different grids (with all `source` on the same grid). If there is one or less `bias_adjust_project`, the assumption is that`source` have different grids."
+    "- If the inputs are not on the same grid, they can be regridded through `regrid_kw` or subset to a point through `subset_kw`. The functions assumes that if there are different `bias_adjust_project`, they will be on different grids (with all `source` on the same grid). If there is one or less `bias_adjust_project`, the assumption is that`source` have different grids.\n",
+    "- You can also compute indicators on the data if the input is daily. This can be especially useful when the daily input data is on different calendars."
    ]
   },
   {
@@ -195,8 +196,12 @@
    "outputs": [],
    "source": [
     "# build a single dataset\n",
+    "import xclim as xc\n",
+    "\n",
     "ds = xs.ensembles.build_partition_data(\n",
-    "    input_dict, subset_kw=dict(name=\"mtl\", method=\"gridpoint\", lat=[45.5], lon=[-73.6])\n",
+    "    input_dict,\n",
+    "    subset_kw=dict(name=\"mtl\", method=\"gridpoint\", lat=[45.5], lon=[-73.6]),\n",
+    "    indicators_kw={\"indicators\": [xc.atmos.tg_mean]},\n",
     ")\n",
     "ds"
    ]
@@ -221,8 +226,8 @@
     "import xarray as xr\n",
     "\n",
     "ds2 = ds.copy()\n",
-    "ds[\"time\"] = xr.cftime_range(start=\"2001-01-01\", periods=len(ds[\"time\"]), freq=\"D\")\n",
-    "ds2[\"time\"] = xr.cftime_range(start=\"2003-01-01\", periods=len(ds[\"time\"]), freq=\"D\")\n",
+    "ds[\"time\"] = xr.cftime_range(start=\"2001-01-01\", periods=len(ds[\"time\"]), freq=\"YS\")\n",
+    "ds2[\"time\"] = xr.cftime_range(start=\"2003-01-01\", periods=len(ds[\"time\"]), freq=\"YS\")\n",
     "ds = xr.concat([ds, ds2], dim=\"time\")"
    ]
   },
@@ -232,13 +237,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import xclim as xc\n",
-    "\n",
-    "# get a yearly dataset\n",
-    "da = xc.atmos.tg_mean(ds=ds)\n",
-    "\n",
     "# compute uncertainty partitionning\n",
-    "mean, uncertainties = xc.ensembles.hawkins_sutton(da)\n",
+    "mean, uncertainties = xc.ensembles.hawkins_sutton(ds.tg_mean)\n",
     "uncertainties"
    ]
   },
@@ -252,13 +252,6 @@
     "    \n",
     "</div>"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 import xarray as xr
+import xclim as xc
 import xesmf
 from xclim.testing.helpers import test_timeseries as timeseries
 
@@ -1072,13 +1073,14 @@ def test_build_partition_data(self, samplecat, tmp_path):
             datasets=datasets,
             partition_dim=["source", "experiment"],
             subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
+            indicators_kw=dict(indicators=[xc.atmos.tg_mean]),
             rename_dict={"source": "new-name"},
         )
 
-        assert ds.dims == {"time": 730, "scenario": 4, "new-name": 2}
+        assert ds.dims == {"time": 2, "scenario": 4, "new-name": 2}
         assert ds.lat.values == 45.0
         assert ds.lon.values == -74
-        assert [i for i in ds.data_vars] == ["tas"]
+        assert [i for i in ds.data_vars] == ["tg_mean"]
 
         # test regrid
         ds_grid = xesmf.util.cf_grid_2d(-75, -74, 0.25, 45, 48, 0.55)
@@ -1098,3 +1100,13 @@ def test_build_partition_data(self, samplecat, tmp_path):
             "lon": 4,
         }
         assert [i for i in ds.data_vars] == ["tas"]
+
+        # test error
+        with pytest.raises(
+            ValueError,
+        ):
+            ds = xs.ensembles.build_partition_data(
+                datasets=datasets,
+                subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
+                indicators_kw=dict(indicators=[xc.atmos.tg_mean, xc.indicators.cf.tg]),
+            )
diff --git a/xscen/ensembles.py b/xscen/ensembles.py
@@ -13,6 +13,7 @@
 from xclim import ensembles
 
 from .config import parse_config
+from .indicators import compute_indicators
 from .regrid import regrid_dataset
 from .spatial import subset
 from .utils import clean_up, get_cat_attrs
@@ -677,6 +678,7 @@ def build_partition_data(
     partition_dim: list[str] = ["source", "experiment", "bias_adjust_project"],
     subset_kw: dict = None,
     regrid_kw: dict = None,
+    indicators_kw: dict = None,
     rename_dict: dict = None,
 ):
     """Get the input for the xclim partition functions.
@@ -686,6 +688,7 @@ def build_partition_data(
     (https://xclim.readthedocs.io/en/stable/api.html#uncertainty-partitioning).
     If the inputs have different grids,
     they have to be subsetted and regridded to a common grid/point.
+    Indicators can also be computed before combining the datasets.
 
 
     Parameters
@@ -702,6 +705,9 @@ def build_partition_data(
         Arguments to pass to `xs.spatial.subset()`.
     regrid_kw:
         Arguments to pass to `xs.regrid_dataset()`.
+    indicators_kw:
+        Arguments to pass to `xs.indicators.compute_indicators()`.
+        All indicators have to be for the same frequency, in order to be put on a single time axis.
     rename_dict:
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
@@ -730,6 +736,15 @@ def build_partition_data(
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 
+        if indicators_kw:
+            dict_ind = compute_indicators(ds, **indicators_kw)
+            if len(dict_ind) > 1:
+                raise ValueError(
+                    f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
+                )
+            else:
+                ds = list(dict_ind.values())[0]
+
         for dim in partition_dim:
             if f"cat:{dim}" in ds.attrs:
                 ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})