Merge branch 'master' into testdata_tagging

Ouranosinc · Mar 30, 2023 · ee8978c · ee8978c
2 parents 957f66e + c6731cb
commit ee8978c
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 25 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -32,6 +32,10 @@ Breaking changes
 * following indices now accept the `op` argument for modifying the threshold comparison operator (:pull:`1325`):
     * ``snw_season_length``, ``snd_season_length``, ``growing_season_length``, ``frost_season_length``, ``frost_free_season_length``, ``rprcptot``, ``daily_pr_intensity``
 
+Bug fixes
+^^^^^^^^^
+* ``xclim.indices.run_length.last_run`` nows works when ``freq`` is not ``None``. (:issue:`1321`, :pull:`1323`).
+
 Internal changes
 ^^^^^^^^^^^^^^^^
 * Added `xclim` to the `ouranos Zenodo community <https://zenodo.org/communities/ouranos/>`_ . (:pull:`1313`).

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.41.11-beta
+current_version = 0.41.12-beta
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?

diff --git a/xclim/__init__.py b/xclim/__init__.py
@@ -11,7 +11,7 @@
 
 __author__ = """Travis Logan"""
 __email__ = "logan.travis@ouranos.ca"
-__version__ = "0.41.11-beta"
+__version__ = "0.41.12-beta"
 
 
 # Load official locales

diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py
@@ -397,13 +397,14 @@ def windowed_run_count(
     return out
 
 
-def first_run(
+def _boundary_run(
     da: xr.DataArray,
     window: int,
-    dim: str = "time",
-    freq: str | None = None,
-    coord: str | bool | None = False,
-    ufunc_1dim: str | bool = "from_context",
+    dim: str,
+    freq: str | None,
+    coord: str | bool | None,
+    ufunc_1dim: str | bool,
+    position: str,
 ) -> xr.DataArray:  # noqa: D202
     """Return the index of the first item of the first run of at least a given length.
 
@@ -415,7 +416,7 @@ def first_run(
         Minimum duration of consecutive run to accumulate values.
         When equal to 1, an optimized version of the algorithm is used.
     dim : str
-      Dimension along which to calculate consecutive run (default: 'time').
+      Dimension along which to calculate consecutive run.
     freq : str
       Resampling frequency.
     coord : Optional[str]
@@ -427,11 +428,13 @@ def first_run(
         usage based on number of data points.  Using 1D_ufunc=True is typically more efficient
         for DataArray with a small number of grid points.
         Ignored when `window=1`. It can be modified globally through the "run_length_ufunc" global option.
+    position : {"first", "last"}
+        Determines if the algorithm finds the "first" or "last" run
 
     Returns
     -------
     xr.DataArray
-        Index (or coordinate if `coord` is not False) of first item in first valid run.
+        Index (or coordinate if `coord` is not False) of first item in first (last) valid run.
         Returns np.nan if there are no valid runs.
     """
 
@@ -449,35 +452,94 @@ def coord_transform(out, da):
         return out
 
     # general method to get indices (or coords) of first run
-    def get_out(d):
-        dmax_ind = d.argmax(dim=dim)
-        # If `d` has no runs, dmax_ind will be 0: We must replace this with NaN
-        out = dmax_ind.where(dmax_ind != d.argmin(dim=dim))
-        out = coord_transform(out, d)
+    def find_boundary_run(runs, position):
+        if position == "last":
+            runs = runs[{dim: slice(None, None, -1)}]
+        dmax_ind = runs.argmax(dim=dim)
+        # If there are no runs, dmax_ind will be 0: We must replace this with NaN
+        out = dmax_ind.where(dmax_ind != runs.argmin(dim=dim))
+        if position == "last":
+            out = runs[dim].size - out - 1
+            runs = runs[{dim: slice(None, None, -1)}]
+        out = coord_transform(out, runs)
         return out
 
     ufunc_1dim = use_ufunc(ufunc_1dim, da, dim=dim, freq=freq)
 
     da = da.fillna(0)  # We expect a boolean array, but there could be NaNs nonetheless
     if window == 1:
         if freq is not None:
-            out = da.resample({dim: freq}).map(get_out)
+            out = da.resample({dim: freq}).map(find_boundary_run, position=position)
         else:
-            out = xr.where(da.any(dim=dim), da.argmax(dim=dim), np.NaN)
-            out = coord_transform(out, da)
+            out = find_boundary_run(da, position)
 
     elif ufunc_1dim:
+        if position == "last":
+            da = da[{dim: slice(None, None, -1)}]
         out = first_run_ufunc(x=da, window=window, dim=dim)
+        if position == "last" and not coord:
+            out = da[dim].size - out - 1
+            da = da[{dim: slice(None, None, -1)}]
         out = coord_transform(out, da)
 
     else:
-        d = rle(da, dim=dim, index="first")
+        # for "first" run, return "first" element in the run (and conversely for "last" run)
+        d = rle(da, dim=dim, index=position)
         d = xr.where(d >= window, 1, 0)
         if freq is not None:
-            out = d.resample({dim: freq}).map(get_out)
+            out = d.resample({dim: freq}).map(find_boundary_run, position=position)
         else:
-            out = get_out(d)
+            out = find_boundary_run(d, position)
+
+    return out
+
+
+def first_run(
+    da: xr.DataArray,
+    window: int,
+    dim: str = "time",
+    freq: str | None = None,
+    coord: str | bool | None = False,
+    ufunc_1dim: str | bool = "from_context",
+) -> xr.DataArray:  # noqa: D202
+    """Return the index of the first item of the first run of at least a given length.
+
+    Parameters
+    ----------
+    da : xr.DataArray
+        Input N-dimensional DataArray (boolean).
+    window : int
+        Minimum duration of consecutive run to accumulate values.
+        When equal to 1, an optimized version of the algorithm is used.
+    dim : str
+      Dimension along which to calculate consecutive run (default: 'time').
+    freq : str
+      Resampling frequency.
+    coord : Optional[str]
+        If not False, the function returns values along `dim` instead of indexes.
+        If `dim` has a datetime dtype, `coord` can also be a str of the name of the
+        DateTimeAccessor object to use (ex: 'dayofyear').
+    ufunc_1dim : Union[str, bool]
+        Use the 1d 'ufunc' version of this function : default (auto) will attempt to select optimal
+        usage based on number of data points.  Using 1D_ufunc=True is typically more efficient
+        for DataArray with a small number of grid points.
+        Ignored when `window=1`. It can be modified globally through the "run_length_ufunc" global option.
 
+    Returns
+    -------
+    xr.DataArray
+        Index (or coordinate if `coord` is not False) of first item in first valid run.
+        Returns np.nan if there are no valid runs.
+    """
+    out = _boundary_run(
+        da,
+        window=window,
+        dim=dim,
+        freq=freq,
+        coord=coord,
+        ufunc_1dim=ufunc_1dim,
+        position="first",
+    )
     return out
 
 
@@ -518,17 +580,15 @@ def last_run(
         Index (or coordinate if `coord` is not False) of last item in last valid run.
         Returns np.nan if there are no valid runs.
     """
-    reversed_da = da.sortby(dim, ascending=False)
-    out = first_run(
-        reversed_da,
+    out = _boundary_run(
+        da,
         window=window,
         dim=dim,
         freq=freq,
         coord=coord,
         ufunc_1dim=ufunc_1dim,
+        position="last",
     )
-    if not coord:
-        return reversed_da[dim].size - out - 1
     return out
 
 

diff --git a/xclim/testing/tests/test_run_length.py b/xclim/testing/tests/test_run_length.py
@@ -290,6 +290,30 @@ def test_simple(self, tas_series, coord, expected, use_dask, ufunc):
         out = rl.first_run(runs, window=1, dim="time", coord=coord)
         np.testing.assert_array_equal(out.load(), expected)
 
+    @pytest.mark.parametrize("use_dask", [True, False])
+    @pytest.mark.parametrize(
+        "coord,expected",
+        [
+            (False, [0, 0]),
+            (True, [np.datetime64("2000-01-01"), np.datetime64("2000-02-01")]),
+            ("dayofyear", [1, 32]),
+        ],
+    )
+    def test_resample_after(self, tas_series, coord, expected, use_dask):
+        t = np.zeros(60)
+        t[0] = 2
+        t[30:40] = 2
+        tas = tas_series(t, start="2000-01-01")
+        runs = xr.concat((tas, tas), dim="dim0")
+
+        if use_dask:
+            runs = runs.chunk({"time": -1 if ufunc else 10, "dim0": 1})
+
+        out = rl.first_run(
+            runs, window=1, dim="time", coord=coord, freq="MS", ufunc_1dim=False
+        )
+        np.testing.assert_array_equal(out.load(), np.array([expected, expected]))
+
 
 class TestWindowedRunEvents:
     @pytest.mark.parametrize("index", ["first", "last"])
@@ -331,6 +355,30 @@ def test_simple(self, tas_series, coord, expected, use_dask, ufunc):
         out = rl.last_run(runs, window=1, dim="time", coord=coord, ufunc_1dim=ufunc)
         np.testing.assert_array_equal(out.load(), expected)
 
+    @pytest.mark.parametrize("use_dask", [True, False])
+    @pytest.mark.parametrize(
+        "coord,expected",
+        [
+            (False, [30, 8]),
+            (True, [np.datetime64("2000-01-31"), np.datetime64("2000-02-09")]),
+            ("dayofyear", [31, 40]),
+        ],
+    )
+    def test_resample_after(self, tas_series, coord, expected, use_dask):
+        t = np.zeros(60)
+        t[0] = 2
+        t[30:40] = 2
+        tas = tas_series(t, start="2000-01-01")
+        runs = xr.concat((tas, tas), dim="dim0")
+
+        if use_dask:
+            runs = runs.chunk({"time": -1 if ufunc else 10, "dim0": 1})
+
+        out = rl.last_run(
+            runs, window=1, dim="time", coord=coord, freq="MS", ufunc_1dim=False
+        )
+        np.testing.assert_array_equal(out.load(), np.array([expected, expected]))
+
 
 def test_run_bounds_synthetic():
     run = xr.DataArray(