Skip to content

Commit

Permalink
Merge branch 'master' into testdata_tagging
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeitsperre committed Mar 30, 2023
2 parents 957f66e + c6731cb commit ee8978c
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 25 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ Breaking changes
* following indices now accept the `op` argument for modifying the threshold comparison operator (:pull:`1325`):
* ``snw_season_length``, ``snd_season_length``, ``growing_season_length``, ``frost_season_length``, ``frost_free_season_length``, ``rprcptot``, ``daily_pr_intensity``

Bug fixes
^^^^^^^^^
* ``xclim.indices.run_length.last_run`` nows works when ``freq`` is not ``None``. (:issue:`1321`, :pull:`1323`).

Internal changes
^^^^^^^^^^^^^^^^
* Added `xclim` to the `ouranos Zenodo community <https://zenodo.org/communities/ouranos/>`_ . (:pull:`1313`).
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.41.11-beta
current_version = 0.41.12-beta
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?
Expand Down
2 changes: 1 addition & 1 deletion xclim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

__author__ = """Travis Logan"""
__email__ = "logan.travis@ouranos.ca"
__version__ = "0.41.11-beta"
__version__ = "0.41.12-beta"


# Load official locales
Expand Down
106 changes: 83 additions & 23 deletions xclim/indices/run_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,13 +397,14 @@ def windowed_run_count(
return out


def first_run(
def _boundary_run(
da: xr.DataArray,
window: int,
dim: str = "time",
freq: str | None = None,
coord: str | bool | None = False,
ufunc_1dim: str | bool = "from_context",
dim: str,
freq: str | None,
coord: str | bool | None,
ufunc_1dim: str | bool,
position: str,
) -> xr.DataArray: # noqa: D202
"""Return the index of the first item of the first run of at least a given length.
Expand All @@ -415,7 +416,7 @@ def first_run(
Minimum duration of consecutive run to accumulate values.
When equal to 1, an optimized version of the algorithm is used.
dim : str
Dimension along which to calculate consecutive run (default: 'time').
Dimension along which to calculate consecutive run.
freq : str
Resampling frequency.
coord : Optional[str]
Expand All @@ -427,11 +428,13 @@ def first_run(
usage based on number of data points. Using 1D_ufunc=True is typically more efficient
for DataArray with a small number of grid points.
Ignored when `window=1`. It can be modified globally through the "run_length_ufunc" global option.
position : {"first", "last"}
Determines if the algorithm finds the "first" or "last" run
Returns
-------
xr.DataArray
Index (or coordinate if `coord` is not False) of first item in first valid run.
Index (or coordinate if `coord` is not False) of first item in first (last) valid run.
Returns np.nan if there are no valid runs.
"""

Expand All @@ -449,35 +452,94 @@ def coord_transform(out, da):
return out

# general method to get indices (or coords) of first run
def get_out(d):
dmax_ind = d.argmax(dim=dim)
# If `d` has no runs, dmax_ind will be 0: We must replace this with NaN
out = dmax_ind.where(dmax_ind != d.argmin(dim=dim))
out = coord_transform(out, d)
def find_boundary_run(runs, position):
if position == "last":
runs = runs[{dim: slice(None, None, -1)}]
dmax_ind = runs.argmax(dim=dim)
# If there are no runs, dmax_ind will be 0: We must replace this with NaN
out = dmax_ind.where(dmax_ind != runs.argmin(dim=dim))
if position == "last":
out = runs[dim].size - out - 1
runs = runs[{dim: slice(None, None, -1)}]
out = coord_transform(out, runs)
return out

ufunc_1dim = use_ufunc(ufunc_1dim, da, dim=dim, freq=freq)

da = da.fillna(0) # We expect a boolean array, but there could be NaNs nonetheless
if window == 1:
if freq is not None:
out = da.resample({dim: freq}).map(get_out)
out = da.resample({dim: freq}).map(find_boundary_run, position=position)
else:
out = xr.where(da.any(dim=dim), da.argmax(dim=dim), np.NaN)
out = coord_transform(out, da)
out = find_boundary_run(da, position)

elif ufunc_1dim:
if position == "last":
da = da[{dim: slice(None, None, -1)}]
out = first_run_ufunc(x=da, window=window, dim=dim)
if position == "last" and not coord:
out = da[dim].size - out - 1
da = da[{dim: slice(None, None, -1)}]
out = coord_transform(out, da)

else:
d = rle(da, dim=dim, index="first")
# for "first" run, return "first" element in the run (and conversely for "last" run)
d = rle(da, dim=dim, index=position)
d = xr.where(d >= window, 1, 0)
if freq is not None:
out = d.resample({dim: freq}).map(get_out)
out = d.resample({dim: freq}).map(find_boundary_run, position=position)
else:
out = get_out(d)
out = find_boundary_run(d, position)

return out


def first_run(
da: xr.DataArray,
window: int,
dim: str = "time",
freq: str | None = None,
coord: str | bool | None = False,
ufunc_1dim: str | bool = "from_context",
) -> xr.DataArray: # noqa: D202
"""Return the index of the first item of the first run of at least a given length.
Parameters
----------
da : xr.DataArray
Input N-dimensional DataArray (boolean).
window : int
Minimum duration of consecutive run to accumulate values.
When equal to 1, an optimized version of the algorithm is used.
dim : str
Dimension along which to calculate consecutive run (default: 'time').
freq : str
Resampling frequency.
coord : Optional[str]
If not False, the function returns values along `dim` instead of indexes.
If `dim` has a datetime dtype, `coord` can also be a str of the name of the
DateTimeAccessor object to use (ex: 'dayofyear').
ufunc_1dim : Union[str, bool]
Use the 1d 'ufunc' version of this function : default (auto) will attempt to select optimal
usage based on number of data points. Using 1D_ufunc=True is typically more efficient
for DataArray with a small number of grid points.
Ignored when `window=1`. It can be modified globally through the "run_length_ufunc" global option.
Returns
-------
xr.DataArray
Index (or coordinate if `coord` is not False) of first item in first valid run.
Returns np.nan if there are no valid runs.
"""
out = _boundary_run(
da,
window=window,
dim=dim,
freq=freq,
coord=coord,
ufunc_1dim=ufunc_1dim,
position="first",
)
return out


Expand Down Expand Up @@ -518,17 +580,15 @@ def last_run(
Index (or coordinate if `coord` is not False) of last item in last valid run.
Returns np.nan if there are no valid runs.
"""
reversed_da = da.sortby(dim, ascending=False)
out = first_run(
reversed_da,
out = _boundary_run(
da,
window=window,
dim=dim,
freq=freq,
coord=coord,
ufunc_1dim=ufunc_1dim,
position="last",
)
if not coord:
return reversed_da[dim].size - out - 1
return out


Expand Down
48 changes: 48 additions & 0 deletions xclim/testing/tests/test_run_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,30 @@ def test_simple(self, tas_series, coord, expected, use_dask, ufunc):
out = rl.first_run(runs, window=1, dim="time", coord=coord)
np.testing.assert_array_equal(out.load(), expected)

@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize(
"coord,expected",
[
(False, [0, 0]),
(True, [np.datetime64("2000-01-01"), np.datetime64("2000-02-01")]),
("dayofyear", [1, 32]),
],
)
def test_resample_after(self, tas_series, coord, expected, use_dask):
t = np.zeros(60)
t[0] = 2
t[30:40] = 2
tas = tas_series(t, start="2000-01-01")
runs = xr.concat((tas, tas), dim="dim0")

if use_dask:
runs = runs.chunk({"time": -1 if ufunc else 10, "dim0": 1})

out = rl.first_run(
runs, window=1, dim="time", coord=coord, freq="MS", ufunc_1dim=False
)
np.testing.assert_array_equal(out.load(), np.array([expected, expected]))


class TestWindowedRunEvents:
@pytest.mark.parametrize("index", ["first", "last"])
Expand Down Expand Up @@ -331,6 +355,30 @@ def test_simple(self, tas_series, coord, expected, use_dask, ufunc):
out = rl.last_run(runs, window=1, dim="time", coord=coord, ufunc_1dim=ufunc)
np.testing.assert_array_equal(out.load(), expected)

@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize(
"coord,expected",
[
(False, [30, 8]),
(True, [np.datetime64("2000-01-31"), np.datetime64("2000-02-09")]),
("dayofyear", [31, 40]),
],
)
def test_resample_after(self, tas_series, coord, expected, use_dask):
t = np.zeros(60)
t[0] = 2
t[30:40] = 2
tas = tas_series(t, start="2000-01-01")
runs = xr.concat((tas, tas), dim="dim0")

if use_dask:
runs = runs.chunk({"time": -1 if ufunc else 10, "dim0": 1})

out = rl.last_run(
runs, window=1, dim="time", coord=coord, freq="MS", ufunc_1dim=False
)
np.testing.assert_array_equal(out.load(), np.array([expected, expected]))


def test_run_bounds_synthetic():
run = xr.DataArray(
Expand Down

0 comments on commit ee8978c

Please sign in to comment.