From 587662776b78ddb6c1e4cec5a485e0bacfed25f6 Mon Sep 17 00:00:00 2001 From: stephenworsley <49274989+stephenworsley@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:59:04 +0000 Subject: [PATCH 1/3] Cherry-pick: Fix usage of map_blocks in AreaWeighted and elsewhere (#5767) * fix usage of map_blocks * fix map_blocks for non-lazy data * add benchmark * unskip benchmark * add benchmark * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove benchmarks * remove unnecessary import * What's New entry. * map_complete_blocks docstring. * map_complete_blocks returns. * Typo. * Typo. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Martin Yeo --- lib/iris/_lazy_data.py | 47 +++++++++++++++++++++-------- lib/iris/analysis/__init__.py | 10 +++--- lib/iris/analysis/_area_weighted.py | 10 +++--- lib/iris/analysis/_regrid.py | 8 ++--- 4 files changed, 46 insertions(+), 29 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 36c0825ad8..1d569be2d8 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -450,10 +450,11 @@ def lazy_elementwise(lazy_array, elementwise_op): return da.map_blocks(elementwise_op, lazy_array, dtype=dtype) -def map_complete_blocks(src, func, dims, out_sizes): +def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): """Apply a function to complete blocks. Complete means that the data is not chunked along the chosen dimensions. + Uses :func:`dask.array.map_blocks` to implement the mapping. Parameters ---------- @@ -465,27 +466,47 @@ def map_complete_blocks(src, func, dims, out_sizes): Dimensions that cannot be chunked. out_sizes : tuple of int Output size of dimensions that cannot be chunked. + *args : tuple + Additional arguments to pass to `func`. + **kwargs : dict + Additional keyword arguments to pass to `func`. + + Returns + ------- + Array-like + + See Also + -------- + :func:`dask.array.map_blocks` : The function used for the mapping. """ + data = None + result = None + if is_lazy_data(src): data = src elif not hasattr(src, "has_lazy_data"): # Not a lazy array and not a cube. So treat as ordinary numpy array. - return func(src) + result = func(src, *args, **kwargs) elif not src.has_lazy_data(): - return func(src.data) + result = func(src.data, *args, **kwargs) else: data = src.lazy_data() - # Ensure dims are not chunked - in_chunks = list(data.chunks) - for dim in dims: - in_chunks[dim] = src.shape[dim] - data = data.rechunk(in_chunks) + if result is None and data is not None: + # Ensure dims are not chunked + in_chunks = list(data.chunks) + for dim in dims: + in_chunks[dim] = src.shape[dim] + data = data.rechunk(in_chunks) - # Determine output chunks - out_chunks = list(data.chunks) - for dim, size in zip(dims, out_sizes): - out_chunks[dim] = size + # Determine output chunks + out_chunks = list(data.chunks) + for dim, size in zip(dims, out_sizes): + out_chunks[dim] = size - return data.map_blocks(func, chunks=out_chunks, dtype=src.dtype) + result = data.map_blocks( + func, *args, chunks=out_chunks, dtype=src.dtype, **kwargs + ) + + return result diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 773e804a14..df069550b8 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1378,18 +1378,16 @@ def _percentile(data, percent, fast_percentile_method=False, **kwargs): percent = [percent] percent = np.array(percent) - # Perform the percentile calculation. - _partial_percentile = functools.partial( + result = iris._lazy_data.map_complete_blocks( + data, _calc_percentile, + (-1,), + percent.shape, percent=percent, fast_percentile_method=fast_percentile_method, **kwargs, ) - result = iris._lazy_data.map_complete_blocks( - data, _partial_percentile, (-1,), percent.shape - ) - # Check whether to reduce to a scalar result, as per the behaviour # of other aggregators. if result.shape == (1,): diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 263f83838c..8ee8509fcf 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -392,9 +392,11 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( tgt_shape = (len(grid_y.points), len(grid_x.points)) - # Calculate new data array for regridded cube. - regrid = functools.partial( + new_data = map_complete_blocks( + src_cube, _regrid_along_dims, + (src_y_dim, src_x_dim), + meshgrid_x.shape, x_dim=src_x_dim, y_dim=src_y_dim, weights=weights, @@ -402,10 +404,6 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( mdtol=mdtol, ) - new_data = map_complete_blocks( - src_cube, regrid, (src_y_dim, src_x_dim), meshgrid_x.shape - ) - # Wrap up the data as a Cube. _regrid_callback = functools.partial( diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py index b85265e5d9..70b28df1c7 100644 --- a/lib/iris/analysis/_regrid.py +++ b/lib/iris/analysis/_regrid.py @@ -932,9 +932,11 @@ def __call__(self, src): x_dim = src.coord_dims(src_x_coord)[0] y_dim = src.coord_dims(src_y_coord)[0] - # Define regrid function - regrid = functools.partial( + data = map_complete_blocks( + src, self._regrid, + (y_dim, x_dim), + sample_grid_x.shape, x_dim=x_dim, y_dim=y_dim, src_x_coord=src_x_coord, @@ -945,8 +947,6 @@ def __call__(self, src): extrapolation_mode=self._extrapolation_mode, ) - data = map_complete_blocks(src, regrid, (y_dim, x_dim), sample_grid_x.shape) - # Wrap up the data as a Cube. _regrid_callback = functools.partial( self._regrid, From 7e983c5fb765cf3b6728b2fb6ce5bd2d553eb214 Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Mon, 4 Mar 2024 15:39:57 +0000 Subject: [PATCH 2/3] What's New patch. --- docs/src/whatsnew/3.8.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/src/whatsnew/3.8.rst b/docs/src/whatsnew/3.8.rst index 5687b25cc7..c45b1e4902 100644 --- a/docs/src/whatsnew/3.8.rst +++ b/docs/src/whatsnew/3.8.rst @@ -42,6 +42,21 @@ This document explains the changes made to Iris for this release any issues or feature requests for improving Iris. Enjoy! +v3.8.1 (04 Mar 2024) +==================== + +.. dropdown:: v3.8.1 Patches + :color: primary + :icon: alert + :animate: fade-in + + The patches in this release of Iris include: + + #. `@stephenworsley`_ fixed a potential memory leak for Iris uses of + :func:`dask.array.map_blocks`; known specifically to be a problem in the + :class:`iris.analysis.AreaWeighted` regridder. (:pull:`5767`) + + 📢 Announcements ================ From a0adb7097b5cbb9c0d0f2cac330664449c6c593b Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Mon, 4 Mar 2024 16:01:48 +0000 Subject: [PATCH 3/3] Make the bugfix panel OPEN. --- docs/src/whatsnew/3.8.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/whatsnew/3.8.rst b/docs/src/whatsnew/3.8.rst index c45b1e4902..9fa87a9337 100644 --- a/docs/src/whatsnew/3.8.rst +++ b/docs/src/whatsnew/3.8.rst @@ -49,6 +49,7 @@ v3.8.1 (04 Mar 2024) :color: primary :icon: alert :animate: fade-in + :open: The patches in this release of Iris include: