ESMValGroup · bouweandela · Apr 16, 2024 · Feb 23, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -780,10 +780,6 @@ regridding is based on the horizontal grid of another cube (the reference
 grid). If the horizontal grids of a cube and its reference grid are sufficiently
 the same, regridding is automatically and silently skipped for performance reasons.
 
-The underlying regridding mechanism in ESMValCore uses
-:obj:`iris.cube.Cube.regrid`
-from Iris.
-
 The use of the horizontal regridding functionality is flexible depending on
 what type of reference grid and what interpolation scheme is preferred. Below
 we show a few examples.
@@ -821,7 +817,7 @@ cell specification is oftentimes used when operating on localized data.
           target_grid: 2.5x2.5
           scheme: nearest
 
-In this case the ``NearestNeighbour`` interpolation scheme is used (see below
+In this case the nearest-neighbor interpolation scheme is used (see below
 for scheme definitions).
 
 When using a ``MxN`` type of grid it is possible to offset the grid cell
@@ -917,9 +913,6 @@ Built-in regridding schemes
   :class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyAreaWeighted`.
   Source data on an unstructured grid is not supported, yet.
 
-See also :func:`esmvalcore.preprocessor.regrid`
-
-
 .. _generic regridding schemes:
 
 Generic regridding schemes
@@ -1017,6 +1010,37 @@ scheme available in :doc:`iris-esmf-regrid:index`:
           reference: esmf_regrid.schemes:regrid_rectilinear_to_rectilinear
           mdtol: 0.7
 
+.. _caching_regridding_weights:
+
+Reusing regridding weights
+--------------------------
+
+If desired, regridding weights can be cached to reduce run times (see `here
+<https://scitools-iris.readthedocs.io/en/latest/userguide/interpolation_and_regridding.html#caching-a-regridder>`__
+for technical details on this).
+This can speed up the regridding of different datasets with similar source and
+target grids massively, but may take up a lot of memory for extremely
+high-resolution data.
+By default, this feature is disabled; to enable it, use the option
+``cache_weights: true`` in the preprocessor definition:
+
+.. code-block:: yaml
+
+    preprocessors:
+      regrid_preprocessor:
+        regrid:
+          target_grid: 0.1x0.1
+          scheme: linear
+          cache_weights: true
+
+Not all regridding schemes support weights caching. An overview of those that
+do is given `here
+<https://scitools-iris.readthedocs.io/en/latest/further_topics/which_regridder_to_use.html#which-regridder-to-use>`__
+and in the docstrings :ref:`here <regridding_schemes>`.
+
+See also :func:`esmvalcore.preprocessor.regrid`
+
+
 .. _ensemble statistics:
 
 Ensemble statistics

diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py
@@ -500,8 +500,7 @@ def _get_target_grid_cube(
     elif isinstance(target_grid, (str, Path)) and os.path.isfile(target_grid):
         target_grid_cube = iris.load_cube(target_grid)
     elif isinstance(target_grid, str):
-        # Generate a target grid from the provided cell-specification,
-        # and cache the resulting stock cube for later use.
+        # Generate a target grid from the provided cell-specification
         target_grid_cube = _global_stock_cube(
             target_grid, lat_offset, lon_offset
         )
@@ -639,12 +638,82 @@ def _load_generic_scheme(scheme: dict):
     return loaded_scheme
 
 
+_CACHED_REGRIDDERS: dict[tuple, dict] = {}
+
+
+def _get_regridder(
+    src_cube: Cube,
+    tgt_cube: Cube,
+    scheme: str | dict,
+    cache_weights: bool,
+):
+    """Get regridder to actually perform regridding.
+
+    Note
+    ----
+    If possible, this uses an existing regridder to reduce runtime (see also
+    https://scitools-iris.readthedocs.io/en/latest/userguide/
+    interpolation_and_regridding.html#caching-a-regridder.)
+
+    """
+    # (1) Weights caching enabled
+    if cache_weights:
+        # To search for a matching regridder in the cache, first check the
+        # regridding scheme name and shapes of source and target coordinates.
+        # Only if these match, check coordinates themselves (this is much more
+        # expensive).
+        coord_key = _get_coord_key(src_cube, tgt_cube)
+        name_shape_key = _get_name_and_shape_key(src_cube, tgt_cube, scheme)
+        if name_shape_key in _CACHED_REGRIDDERS:
+            # We cannot simply do a test for `coord_key in
+            # _CACHED_REGRIDDERS[shape_key]` below since the hash() of a
+            # coordinate is simply its id() (thus, coordinates loaded from two
+            # different files would never be considered equal)
+            for (key, regridder) in _CACHED_REGRIDDERS[name_shape_key].items():
+                if key == coord_key:
+                    return regridder
+
+        # Regridder is not in cached -> return a new one and cache it
+        loaded_scheme = _load_scheme(src_cube, scheme)
+        regridder = loaded_scheme.regridder(src_cube, tgt_cube)
+        _CACHED_REGRIDDERS.setdefault(name_shape_key, {})
+        _CACHED_REGRIDDERS[name_shape_key][coord_key] = regridder
+
+    # (2) Weights caching disabled
+    else:
+        loaded_scheme = _load_scheme(src_cube, scheme)
+        regridder = loaded_scheme.regridder(src_cube, tgt_cube)
+
+    return regridder
+
+
+def _get_coord_key(src_cube: Cube, tgt_cube: Cube) -> tuple:
+    """Get dict key from coordinates."""
+    src_lat = src_cube.coord('latitude')
+    src_lon = src_cube.coord('longitude')
+    tgt_lat = tgt_cube.coord('latitude')
+    tgt_lon = tgt_cube.coord('longitude')
+    return (src_lat, src_lon, tgt_lat, tgt_lon)
+
+
+def _get_name_and_shape_key(
+    src_cube: Cube,
+    tgt_cube: Cube,
+    scheme: str | dict,
+) -> tuple:
+    """Get dict key from scheme name and coordinate shapes."""
+    name = str(scheme)
+    shapes = [c.shape for c in _get_coord_key(src_cube, tgt_cube)]
+    return (name, *shapes)
+
+
 def regrid(
     cube: Cube,
     target_grid: Cube | Dataset | Path | str | dict,
     scheme: str | dict,
     lat_offset: bool = True,
     lon_offset: bool = True,
+    cache_weights: bool = False,
 ) -> Cube:
     """Perform horizontal regridding.
 
@@ -691,6 +760,14 @@ def regrid(
         Offset the grid centers of the longitude coordinate w.r.t. Greenwich
         meridian by half a grid step. This argument is ignored if
         `target_grid` is a cube or file.
+    cache_weights:
+        If ``True``, cache regridding weights for later usage. This can speed
+        up the regridding of different datasets with similar source and target
+        grids massively, but may take up a lot of memory for extremely
+        high-resolution data. This option is ignored for schemes that do not
+        support weights caching. More details on this are given in the section
+        on :ref:`caching_regridding_weights`. To clear the cache, use
+        :func:`esmvalcore.preprocessor.regrid.cache_clear`.
 
     Returns
     -------
@@ -757,16 +834,26 @@ def regrid(
             )
         return cube
 
-    # Load scheme, rechunk and regrid
+    # Load scheme and reuse existing regridder if possible
     if isinstance(scheme, str):
         scheme = scheme.lower()
-    loaded_scheme = _load_scheme(cube, scheme)
+    regridder = _get_regridder(cube, target_grid_cube, scheme, cache_weights)
+
+    # Rechunk and actually perform the regridding
     cube = _rechunk(cube, target_grid_cube)
-    cube = cube.regrid(target_grid_cube, loaded_scheme)
+    cube = regridder(cube)
 
     return cube
 
 
+def _cache_clear():
+    """Clear regridding weights cache."""
+    _CACHED_REGRIDDERS.clear()
+
+
+regrid.cache_clear = _cache_clear  # type: ignore
+
+
 def _rechunk(cube: Cube, target_grid: Cube) -> Cube:
     """Re-chunk cube with optimal chunk sizes for target grid."""
     if not cube.has_lazy_data() or cube.ndim < 3:

diff --git a/esmvalcore/preprocessor/_regrid_esmpy.py b/esmvalcore/preprocessor/_regrid_esmpy.py
@@ -43,6 +43,8 @@
 class ESMPyRegridder:
     """General ESMPy regridder.
 
+    Does not support lazy regridding nor weights caching.
+
     Parameters
     ----------
     src_cube:

diff --git a/esmvalcore/preprocessor/regrid_schemes.py b/esmvalcore/preprocessor/regrid_schemes.py
@@ -31,6 +31,9 @@
 class GenericRegridder:
     r"""Generic function regridder.
 
+    Does support lazy regridding if `func` does. Does not support weights
+    caching.
+
     Parameters
     ----------
     src_cube: