From 864fabd13ad0cfd9979ccbcaea1143dac8991a58 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Fri, 13 Aug 2021 15:21:15 +0200
Subject: [PATCH 01/21] fix deramp residual and optimizer

---
 xdem/coreg.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/xdem/coreg.py b/xdem/coreg.py
index 983ae6ef..02614465 100644
--- a/xdem/coreg.py
+++ b/xdem/coreg.py
@@ -1033,7 +1033,8 @@ def poly2d(x_coordinates: np.ndarray, y_coordinates: np.ndarray,
             return estimated_values  # type: ignore
 
         def residuals(coefs: np.ndarray, x_coords: np.ndarray, y_coords: np.ndarray, targets: np.ndarray):
-            return np.median(np.abs(targets - poly2d(x_coords, y_coords, coefs)))
+            res = targets - poly2d(x_coords, y_coords, coefs)
+            return res[np.isfinite(res)]
 
         if verbose:
             print("Estimating deramp function...")
@@ -1055,15 +1056,14 @@ def residuals(coefs: np.ndarray, x_coords: np.ndarray, y_coords: np.ndarray, tar
             ddem = ddem[indices]
 
         # Optimize polynomial parameters
-        coefs = scipy.optimize.fmin(
+        coefs = scipy.optimize.leastsq(
             func=residuals,
             x0=np.zeros(shape=((self.degree + 1) * (self.degree + 2) // 2)),
-            args=(x_coords, y_coords, ddem),
-            disp=verbose
+            args=(x_coords, y_coords, ddem)
         )
 
-        self._meta["coefficients"] = coefs
-        self._meta["func"] = lambda x, y: poly2d(x, y, coefs)
+        self._meta["coefficients"] = coefs[0]
+        self._meta["func"] = lambda x, y: poly2d(x, y, coefs[0])
 
     def _apply_func(self, dem: np.ndarray, transform: rio.transform.Affine) -> np.ndarray:
         """Apply the deramp function to a DEM."""

From 9863231785be4bf1cd50078c7a03f920ecb861ea Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Thu, 18 Aug 2022 22:59:24 +0200
Subject: [PATCH 02/21] Add wrapper neff function

---
 xdem/spatialstats.py | 83 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index ecb83e93..33a6dfb7 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -17,6 +17,7 @@
 from numba import njit
 import numpy as np
 import pandas as pd
+import geopandas as gpd
 from scipy import integrate
 from scipy.optimize import curve_fit
 from scipy.spatial.distance import pdist, squareform
@@ -25,6 +26,7 @@
 from scipy.stats import binned_statistic, binned_statistic_2d, binned_statistic_dd
 from geoutils.spatial_tools import subsample_raster, get_array_and_mask
 from geoutils.georaster import RasterType, Raster
+from geoutils.geovector import VectorType, Vector
 
 with warnings.catch_warnings():
     warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -1259,7 +1261,7 @@ def hcov_sum(h):
     return neff
 
 
-def neff_exact(coords: np.ndarray, errors: np.ndarray, params_variogram_model: pd.DataFrame, vectorized=True) -> float:
+def neff_exact(coords: np.ndarray, errors: np.ndarray, params_variogram_model: pd.DataFrame, vectorized: bool = True) -> float:
     """
      Exact number of effective samples derived from a double sum of covariance with euclidean coordinates based on
      the provided variogram parameters. This method works for any shape of area.
@@ -1270,6 +1272,7 @@ def neff_exact(coords: np.ndarray, errors: np.ndarray, params_variogram_model: p
         (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
         sills (e.g., [0.8, 0.2]) and "smooth" for the smoothness parameter if it exists for this model (e.g.,
         [None, 0.2]).
+    :param vectorized: Perform the vectorized calculation (used for testing).
 
     :return: Number of effective samples
     """
@@ -1318,7 +1321,7 @@ def neff_exact(coords: np.ndarray, errors: np.ndarray, params_variogram_model: p
     return neff
 
 def neff_hugonnet_approx(coords: np.ndarray, errors: np.ndarray, params_variogram_model: pd.DataFrame, subsample: int = 1000,
-                         vectorized=True, random_state: None | np.random.RandomState | np.random.Generator | int = None) -> float:
+                         vectorized: bool = True, random_state: None | np.random.RandomState | np.random.Generator | int = None) -> float:
     """
     Approximated number of effective samples derived from a double sum of covariance subsetted on one of the two sums,
     based on euclidean coordinates with the provided variogram parameters. This method works for any shape of area.
@@ -1394,6 +1397,82 @@ def neff_hugonnet_approx(coords: np.ndarray, errors: np.ndarray, params_variogra
 
     return neff
 
+def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame, params_variogram_model: pd.DataFrame,
+                             rasterize_resolution: RasterType | float = None, **kwargs) -> float:
+    """
+    Compute the number of effective samples, i.e. the number of uncorrelated samples, in an area accounting for spatial
+    correlations described by a sum of variogram models.
+
+    This function wraps two methods:
+
+    - A discretized integration method that provides the exact estimate for any shape of area using a double sum of
+        covariance. By default, this method is approximated using Equation 18 of Hugonnet et al. (2022), https://doi.org/10.1109/jstars.2022.3188922
+        to decrease computing times while preserving a good approximation.
+
+    - A continuous integration method that provides a conservative (i.e., slightly overestimated) value for a disk
+        area shape, based on a generalization of the approach of Rolstad et al. (2009), http://dx.doi.org/10.3189/002214309789470950.
+
+    By default, if a numeric value is passed for an area, the continuous method is used considering a disk shape. If a
+    vector is passed, the discretized method is computed on that shape.
+
+    :param area: Area of interest either as a numeric value of surface in the same unit as the variogram ranges (will
+    assume a circular shape), or as a vector (shapefile) of the area
+    :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
+        (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
+        sills (e.g., [0.8, 0.2]) and "smooth" for the smoothness parameter if it exists for this model (e.g.,
+        [None, 0.2]).
+    :param rasterize_resolution: Resolution to rasterize the area if passed as a vector. Can be a float value or a Raster.
+    :param kwargs: Keyword argument to pass to the `neff_hugonnet_approx` function.
+    :return: Number of effective samples
+    """
+
+    # Check input for variogram parameters
+    _check_validity_params_variogram(params_variogram_model=params_variogram_model)
+
+    # If area is numeric, run the continuous circular approximation
+    if isinstance(area, (float, int, np.floating, np.integer)):
+        neff = neff_circular_approx_numerical(area=area, params_variogram_model=params_variogram_model)
+
+    # Otherwise, run the discrete sum of covariance
+    elif isinstance(area, (Vector, gpd.GeoDataFrame)):
+
+        # If the input is a geopandas dataframe, put into a Vector object
+        if isinstance(area, gpd.GeoDataFrame):
+            V = Vector(area)
+        else:
+            V = area
+
+        # Rasterize with numeric resolution or Raster metadata
+        if isinstance(rasterize_resolution, (float, int, np.floating, np.integer)):
+
+            # We only need relative mask and coordinates, not absolute
+            mask = V.create_mask(xres=rasterize_resolution)
+            x = rasterize_resolution * np.arange(0, mask.shape[0])
+            y = rasterize_resolution * np.arange(0, mask.shape[1])
+            coords = np.meshgrid(x, y)
+            coords_on_mask = coords[:, mask]
+
+        elif isinstance(rasterize_resolution, Raster):
+
+            # With a Raster we can get the coordinates directly
+            mask = V.create_mask(rst=rasterize_resolution)
+            coords = rasterize_resolution.coords()
+            coords_on_mask = coords[:, mask]
+
+        else:
+            ValueError('The rasterize resolution must be a float, integer or Raster subclass.')
+
+        # Here we don't care about heteroscedasticity, so all errors are standardized to one
+        errors_on_mask = np.ones(len(coords_on_mask))
+
+        neff = neff_hugonnet_approx(coords=coords_on_mask, errors=errors_on_mask,
+                                    params_variogram_model=params_variogram_model, **kwargs)
+
+    else:
+        ValueError('Area must be a float, integer, Vector subclass or geopandas dataframe.')
+
+    return neff
+
 
 def _std_err_finite(std: float, neff_tot: float, neff: float) -> float:
     """

From 044c312cca6678a733a253c9d445902632d1fc4a Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Fri, 19 Aug 2022 16:58:32 +0200
Subject: [PATCH 03/21] Finalize neff wrapper function and add tests

---
 tests/test_spatialstats.py | 72 ++++++++++++++++++++++++++++++++++----
 xdem/spatialstats.py       | 29 ++++++++++-----
 2 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 28cfd6d0..a2e6bae9 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -8,6 +8,7 @@
 import skgstat
 
 import geoutils as gu
+from geoutils import Raster, Vector
 import numpy as np
 import pandas as pd
 import pytest
@@ -21,13 +22,13 @@
 
 PLOT = False
 
-def load_ref_and_diff() -> tuple[gu.georaster.Raster, gu.georaster.Raster, np.ndarray]:
+def load_ref_and_diff() -> tuple[Raster, Raster, np.ndarray]:
     """Load example files to try coregistration methods with."""
 
-    reference_raster = gu.georaster.Raster(examples.get_path("longyearbyen_ref_dem"))
-    glacier_mask = gu.geovector.Vector(examples.get_path("longyearbyen_glacier_outlines"))
+    reference_raster = Raster(examples.get_path("longyearbyen_ref_dem"))
+    glacier_mask = Vector(examples.get_path("longyearbyen_glacier_outlines"))
 
-    ddem = gu.georaster.Raster(examples.get_path('longyearbyen_ddem'))
+    ddem = Raster(examples.get_path('longyearbyen_ddem'))
     mask = glacier_mask.create_mask(ddem)
 
     return reference_raster, ddem, mask
@@ -351,8 +352,7 @@ def test_neff_exact_and_approx_hugonnet(self):
 
         # Generate a gridded dataset with varying errors associated to each pixel
         shape = (15, 15)
-        np.random.seed(42)
-        errors = np.abs(np.random.normal(0, 1, size=shape))
+        errors = np.ones(shape)
 
         # Coordinates
         x = np.arange(0, shape[0])
@@ -372,7 +372,8 @@ def test_neff_exact_and_approx_hugonnet(self):
         t1 = time.time()
 
         # Check that the non-vectorized version gives the same result
-        neff_exact_nv = xdem.spatialstats.neff_exact(coords=coords, errors=errors, params_variogram_model=params_variogram_model, vectorized=False)
+        neff_exact_nv = xdem.spatialstats.neff_exact(coords=coords, errors=errors,
+                                                     params_variogram_model=params_variogram_model, vectorized=False)
         t2 = time.time()
         assert neff_exact == pytest.approx(neff_exact_nv, rel=0.001)
 
@@ -397,6 +398,63 @@ def test_neff_exact_and_approx_hugonnet(self):
         # Check that the approximation is about the same as the original estimate within 10%
         assert neff_approx == pytest.approx(neff_exact, rel=0.1)
 
+    def test_number_effective_samples(self):
+        """Test that the wrapper function for neff functions behaves correctly and that output values are robust"""
+
+        # The function should return the same result as neff_circular_approx_numerical when using a numerical area
+        area = 10000
+        params_variogram_model = pd.DataFrame(data={'model':['spherical', 'gaussian'], 'range':[300, 3000], 'psill':[0.5, 0.5]})
+
+        neff1 = xdem.spatialstats.neff_circular_approx_numerical(area=area, params_variogram_model=params_variogram_model)
+        neff2 = xdem.spatialstats.number_effective_samples(area=area, params_variogram_model=params_variogram_model)
+
+        assert neff1 == pytest.approx(neff2, rel=0.0001)
+
+        # The function should return the same results as neff_hugonnet_approx when using a shape area
+        # First, get the vector area and compute with the wrapper function
+        res = 100.
+        outlines = Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
+        outlines_medals = Vector(outlines.ds[outlines.ds['NAME']=='Medalsbreen'])
+        neff1 = xdem.spatialstats.number_effective_samples(area=outlines_medals, params_variogram_model=params_variogram_model,
+                                                           rasterize_resolution=res, random_state=42)
+        # Second, get coordinates manually and compute with the neff_approx_hugonnet function
+        mask = outlines_medals.create_mask(xres=res)
+        x = res * np.arange(0, mask.shape[0])
+        y = res * np.arange(0, mask.shape[1])
+        coords = np.array(np.meshgrid(y, x))
+        coords_on_mask = coords[:, mask].T
+        errors_on_mask = np.ones(len(coords_on_mask))
+        neff2 = xdem.spatialstats.neff_hugonnet_approx(coords=coords_on_mask, errors=errors_on_mask,
+                                                       params_variogram_model=params_variogram_model, random_state=42)
+        # We can test the match between values accurately thanks to the random_state
+        assert neff1 == pytest.approx(neff2, rel=0.00001)
+
+        # Check that using a Raster as input for the resolution works
+        raster = Raster(examples.get_path("longyearbyen_ref_dem"))
+        neff3 = xdem.spatialstats.number_effective_samples(area=outlines_medals,
+                                                           params_variogram_model=params_variogram_model,
+                                                           rasterize_resolution=raster, random_state=42)
+        # The value should be nearly the same within 1% (the discretization grid is different so affects a tiny bit the result)
+        assert neff3 == pytest.approx(neff2, rel=0.01)
+
+        # Check that the number of effective samples matches that of the circular approximation within 20%
+        area_medals = np.sum(outlines_medals.ds.area.values)
+        neff4 = xdem.spatialstats.number_effective_samples(area=area_medals, params_variogram_model=params_variogram_model)
+        assert neff4 == pytest.approx(neff2, rel=0.2)
+        # The circular approximation is always conservative, so should yield a smaller value
+        assert neff4 < neff2
+
+        # Check that errors are correctly raised
+        with pytest.warns(UserWarning, match='Resolution for vector rasterization is not defined and thus set at 20% '
+                                             'of the shortest correlation range, which might result in large memory usage.'):
+            xdem.spatialstats.number_effective_samples(area=outlines_medals, params_variogram_model=params_variogram_model)
+        with pytest.raises(ValueError, match='Area must be a float, integer, Vector subclass or geopandas dataframe.'):
+            xdem.spatialstats.number_effective_samples(area='not supported', params_variogram_model=params_variogram_model)
+        with pytest.raises(ValueError, match='The rasterize resolution must be a float, integer or Raster subclass.'):
+            xdem.spatialstats.number_effective_samples(area=10, params_variogram_model=params_variogram_model,
+                                                       rasterize_resolution=(10, 10))
+
+
 class TestSubSampling:
 
     def test_circular_masking(self):
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 33a6dfb7..bf79ec99 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1266,7 +1266,7 @@ def neff_exact(coords: np.ndarray, errors: np.ndarray, params_variogram_model: p
      Exact number of effective samples derived from a double sum of covariance with euclidean coordinates based on
      the provided variogram parameters. This method works for any shape of area.
 
-    :param coords: Center coordinates with size (N,) for each spatial support (typically, pixel)
+    :param coords: Center coordinates with size (N,2) for each spatial support (typically, pixel)
     :param errors: Errors at the coordinates with size (N,) for each spatial support (typically, pixel)
     :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
         (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
@@ -1327,7 +1327,7 @@ def neff_hugonnet_approx(coords: np.ndarray, errors: np.ndarray, params_variogra
     based on euclidean coordinates with the provided variogram parameters. This method works for any shape of area.
     See Hugonnet et al. (2022), https://doi.org/10.1109/jstars.2022.3188922, in particular Supplementary Fig. S16.
 
-    :param coords: Center coordinates with size (N,) for each spatial support (typically, pixel)
+    :param coords: Center coordinates with size (N,2) for each spatial support (typically, pixel)
     :param errors: Errors at the coordinates with size (N,) for each spatial support (typically, pixel)
     :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
         (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
@@ -1358,8 +1358,11 @@ def neff_hugonnet_approx(coords: np.ndarray, errors: np.ndarray, params_variogra
     n = len(coords)
     pds = pdist(coords)
 
+    # At maximum, the number of subsamples has to be equal to number of points
+    subsample = min(subsample, n)
+
     # Get random subset of points for one of the sums
-    rand_points = rnd.choice(n, size=min(subsample, n), replace=False)
+    rand_points = rnd.choice(n, size=subsample, replace=False)
 
     # Now we compute the double covariance sum
     # Either using for-loop-version
@@ -1413,7 +1416,9 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
         area shape, based on a generalization of the approach of Rolstad et al. (2009), http://dx.doi.org/10.3189/002214309789470950.
 
     By default, if a numeric value is passed for an area, the continuous method is used considering a disk shape. If a
-    vector is passed, the discretized method is computed on that shape.
+    vector is passed, the discretized method is computed on that shape. If the discretized method is used, a resolution
+    for rasterization is generally expected, otherwise is arbitrarily chosen as a fifth of the shortest correlation
+    range to ensure a sufficiently fine grid for propagation of the shortest range.
 
     :param area: Area of interest either as a numeric value of surface in the same unit as the variogram ranges (will
     assume a circular shape), or as a vector (shapefile) of the area
@@ -1423,6 +1428,7 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
         [None, 0.2]).
     :param rasterize_resolution: Resolution to rasterize the area if passed as a vector. Can be a float value or a Raster.
     :param kwargs: Keyword argument to pass to the `neff_hugonnet_approx` function.
+
     :return: Number of effective samples
     """
 
@@ -1442,6 +1448,11 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
         else:
             V = area
 
+        if rasterize_resolution is None:
+            rasterize_resolution =  np.min(params_variogram_model['range'].values)/5.
+            warnings.warn('Resolution for vector rasterization is not defined and thus set at 20% of the shortest '
+                'correlation range, which might result in large memory usage.')
+
         # Rasterize with numeric resolution or Raster metadata
         if isinstance(rasterize_resolution, (float, int, np.floating, np.integer)):
 
@@ -1449,15 +1460,15 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
             mask = V.create_mask(xres=rasterize_resolution)
             x = rasterize_resolution * np.arange(0, mask.shape[0])
             y = rasterize_resolution * np.arange(0, mask.shape[1])
-            coords = np.meshgrid(x, y)
-            coords_on_mask = coords[:, mask]
+            coords = np.array(np.meshgrid(y, x))
+            coords_on_mask = coords[:, mask].T
 
         elif isinstance(rasterize_resolution, Raster):
 
             # With a Raster we can get the coordinates directly
-            mask = V.create_mask(rst=rasterize_resolution)
-            coords = rasterize_resolution.coords()
-            coords_on_mask = coords[:, mask]
+            mask = V.create_mask(rst=rasterize_resolution).squeeze()
+            coords = np.array(rasterize_resolution.coords())
+            coords_on_mask = coords[:, mask].T
 
         else:
             ValueError('The rasterize resolution must be a float, integer or Raster subclass.')

From eb5b4ee03517062fa4ec369ca3a0a1d1f9517979 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Fri, 19 Aug 2022 23:22:29 +0200
Subject: [PATCH 04/21] Fix tests

---
 tests/test_spatialstats.py | 20 ++++++++++----------
 xdem/spatialstats.py       |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index a2e6bae9..4030f731 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -414,11 +414,11 @@ def test_number_effective_samples(self):
         # First, get the vector area and compute with the wrapper function
         res = 100.
         outlines = Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
-        outlines_medals = Vector(outlines.ds[outlines.ds['NAME']=='Medalsbreen'])
-        neff1 = xdem.spatialstats.number_effective_samples(area=outlines_medals, params_variogram_model=params_variogram_model,
+        outlines_brom = Vector(outlines.ds[outlines.ds['NAME']=='Brombreen'])
+        neff1 = xdem.spatialstats.number_effective_samples(area=outlines_brom, params_variogram_model=params_variogram_model,
                                                            rasterize_resolution=res, random_state=42)
         # Second, get coordinates manually and compute with the neff_approx_hugonnet function
-        mask = outlines_medals.create_mask(xres=res)
+        mask = outlines_brom.create_mask(xres=res)
         x = res * np.arange(0, mask.shape[0])
         y = res * np.arange(0, mask.shape[1])
         coords = np.array(np.meshgrid(y, x))
@@ -431,15 +431,15 @@ def test_number_effective_samples(self):
 
         # Check that using a Raster as input for the resolution works
         raster = Raster(examples.get_path("longyearbyen_ref_dem"))
-        neff3 = xdem.spatialstats.number_effective_samples(area=outlines_medals,
+        neff3 = xdem.spatialstats.number_effective_samples(area=outlines_brom,
                                                            params_variogram_model=params_variogram_model,
                                                            rasterize_resolution=raster, random_state=42)
-        # The value should be nearly the same within 1% (the discretization grid is different so affects a tiny bit the result)
-        assert neff3 == pytest.approx(neff2, rel=0.01)
+        # The value should be nearly the same within 2% (the discretization grid is different so affects a tiny bit the result)
+        assert neff3 == pytest.approx(neff2, rel=0.02)
 
         # Check that the number of effective samples matches that of the circular approximation within 20%
-        area_medals = np.sum(outlines_medals.ds.area.values)
-        neff4 = xdem.spatialstats.number_effective_samples(area=area_medals, params_variogram_model=params_variogram_model)
+        area_brom = np.sum(outlines_brom.ds.area.values)
+        neff4 = xdem.spatialstats.number_effective_samples(area=area_brom, params_variogram_model=params_variogram_model)
         assert neff4 == pytest.approx(neff2, rel=0.2)
         # The circular approximation is always conservative, so should yield a smaller value
         assert neff4 < neff2
@@ -447,11 +447,11 @@ def test_number_effective_samples(self):
         # Check that errors are correctly raised
         with pytest.warns(UserWarning, match='Resolution for vector rasterization is not defined and thus set at 20% '
                                              'of the shortest correlation range, which might result in large memory usage.'):
-            xdem.spatialstats.number_effective_samples(area=outlines_medals, params_variogram_model=params_variogram_model)
+            xdem.spatialstats.number_effective_samples(area=outlines_brom, params_variogram_model=params_variogram_model)
         with pytest.raises(ValueError, match='Area must be a float, integer, Vector subclass or geopandas dataframe.'):
             xdem.spatialstats.number_effective_samples(area='not supported', params_variogram_model=params_variogram_model)
         with pytest.raises(ValueError, match='The rasterize resolution must be a float, integer or Raster subclass.'):
-            xdem.spatialstats.number_effective_samples(area=10, params_variogram_model=params_variogram_model,
+            xdem.spatialstats.number_effective_samples(area=outlines_brom, params_variogram_model=params_variogram_model,
                                                        rasterize_resolution=(10, 10))
 
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index bf79ec99..b6df8c65 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1471,7 +1471,7 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
             coords_on_mask = coords[:, mask].T
 
         else:
-            ValueError('The rasterize resolution must be a float, integer or Raster subclass.')
+            raise ValueError('The rasterize resolution must be a float, integer or Raster subclass.')
 
         # Here we don't care about heteroscedasticity, so all errors are standardized to one
         errors_on_mask = np.ones(len(coords_on_mask))
@@ -1480,7 +1480,7 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
                                     params_variogram_model=params_variogram_model, **kwargs)
 
     else:
-        ValueError('Area must be a float, integer, Vector subclass or geopandas dataframe.')
+        raise ValueError('Area must be a float, integer, Vector subclass or geopandas dataframe.')
 
     return neff
 

From d4bec4c0d5c0a6c7d5272307f68d515c7e97f6c1 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Sat, 20 Aug 2022 22:04:13 +0200
Subject: [PATCH 05/21] Add pipeline error functions

---
 xdem/spatialstats.py | 350 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 344 insertions(+), 6 deletions(-)

diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index b6df8c65..7138def1 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -8,7 +8,7 @@
 import warnings
 from functools import partial
 
-from typing import Callable, Union, Iterable, Optional, Sequence, Any
+from typing import Callable, Union, Iterable, Optional, Sequence, Any, overload
 
 import itertools
 import matplotlib
@@ -164,7 +164,7 @@ def nd_binning(values: np.ndarray, list_var: Iterable[np.ndarray], list_var_name
     return df_concat
 
 
-def interp_nd_binning(df: pd.DataFrame, list_var_names: Union[str,list[str]], statistic : Union[str, Callable[[np.ndarray],float]] = nmad,
+def interp_nd_binning(df: pd.DataFrame, list_var_names: Union[str, Iterable[str]], statistic : Union[str, Callable[[np.ndarray],float]] = nmad,
                       min_count: Optional[int] = 100) -> Callable[[tuple[np.ndarray, ...]], np.ndarray]:
     """
     Estimate an interpolant function for an N-dimensional binning. Preferably based on the output of nd_binning.
@@ -246,7 +246,7 @@ def interp_nd_binning(df: pd.DataFrame, list_var_names: Union[str,list[str]], st
 
     # Remove statistic values calculated with a sample count under the minimum count
     if min_count is not None:
-        df_sub.loc[df_sub['count'] < min_count,statistic_name] = np.nan
+        df_sub.loc[df_sub['count'] < min_count, statistic_name] = np.nan
 
     values = df_sub[statistic_name].values
     ind_valid = np.isfinite(values)
@@ -282,10 +282,204 @@ def interp_nd_binning(df: pd.DataFrame, list_var_names: Union[str,list[str]], st
     return interp_fun
 
 
+def two_step_standardization(dvalues: np.ndarray, list_var: Iterable[np.ndarray],
+                unscaled_error_fun: Callable[[tuple[np.ndarray, ...]], np.ndarray],
+                spread_statistic: Callable = nmad,
+                fac_spread_outliers: float | None = 7
+                ) -> tuple[np.ndarray, Callable[[tuple[np.ndarray, ...]], np.ndarray]]:
+    """
+    Standardize the proxy differenced values using the modelled heteroscedasticity, re-scaled to the spread statistic,
+    and generate the final standardization function.
+
+    :param dvalues: Proxy values array of size (N,)
+    :param list_var: List of size (L) of explanatory variables array of size (N,)
+    :param unscaled_error_fun: Function of the spread with explanatory variables not yet re-scaled
+    :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
+    :param fac_spread_outliers: Exclude outliers outside this spread after standardizing; pass None to ignore.
+
+    :return: Standardized values array of size (N,), Function to destandardize
+    """
+
+    # Standardize a first time with the function
+    zscores = dvalues / unscaled_error_fun(tuple(list_var))
+
+    # Set large outliers that might have been created by the standardization to NaN, central tendency should already be
+    # around zero so only need to take the absolute value
+    if fac_spread_outliers is not None:
+        zscores[np.abs(zscores) > fac_spread_outliers * spread_statistic(zscores)] = np.nan
+
+    # Re-compute the spread statistic to re-standardize, as dividing by the function will not necessarily bring the
+    # z-score exactly equal to one due to approximations of N-D binning, interpolating and due to the outlier filtering
+    zscore_nmad = spread_statistic(zscores)
+
+    # Re-standardize
+    zscores /= zscore_nmad
+
+    # Define the exact function for de-standardization to pass as output
+    def error_fun(*args):
+        return zscore_nmad * unscaled_error_fun(*args)
+
+    return zscores, error_fun
+
+
+def estimate_model_heteroscedasticity(dvalues: np.ndarray, list_var: Iterable[np.ndarray], list_var_names: Iterable[str],
+                                      spread_statistic: Callable = nmad,
+                                      list_var_bins: Optional[Union[int,Iterable[Iterable]]] = None,
+                                      min_count: Optional[int] = 100,
+                                      fac_spread_outliers: float | None = 7
+                                      ) -> tuple[pd.DataFrame, Callable[[tuple[np.ndarray, ...]], np.ndarray]]:
+    """
+    Estimate and model the heteroscedasticity (i.e., variability in error) according to a list of explanatory variables
+    from a proxy of differenced values (e.g., elevation differences), if possible compared to a source of higher
+    precision.
+
+    This function performs N-D data binning with the list of explanatory variable for a spread statistic, then
+    performs N-D interpolation on this statistic, scales the output with a two-step standardization to return an error
+    function of the explanatory variables.
+
+    The functions used are `nd_binning`, `interp_nd_binning` and `two_step_standardization`.
+
+    :param dvalues: Proxy values array of size (N,)
+    :param list_var: List of size (L) of explanatory variables array of size (N,)
+    :param list_var_names: List of size (L) of names of the explanatory variables
+    :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
+    :param list_var_bins: Count of size (1), or list of size (L) of counts or custom bin edges for the explanatory variables; defaults to 10 bins
+    :param min_count: Minimum number of samples to be used as a valid statistic (replaced by nodata)
+    :param fac_spread_outliers: Exclude outliers outside this spread after standardizing; pass None to ignore.
+
+    :return: Dataframe of binned spread statistic with explanatory variables, Error function with explanatory variables
+    """
+
+    # Perform N-D binning with the differenced values computing the spread statistic
+    df = nd_binning(values=dvalues, list_var=list_var, list_var_names=list_var_names, statistics=[spread_statistic],
+                    list_var_bins=list_var_bins)
+
+    # Perform N-D linear interpolation for the spread statistic
+    fun = interp_nd_binning(df, list_var_names=list_var_names, statistic=spread_statistic.__name__, min_count=min_count)
+
+    # Get the final function based on a two-step standardization
+    final_fun = two_step_standardization(dvalues=dvalues, list_var=list_var, unscaled_error_fun=fun,
+                                         spread_statistic=spread_statistic, fac_spread_outliers=fac_spread_outliers)[1]
+
+    return df, final_fun
+
+
+@overload
+def infer_heteroscedasticy_from_stable(dvalues: np.ndarray , list_var: list[np.ndarray | RasterType],
+                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       list_var_names: Iterable[str],
+                                       spread_statistic: Callable,
+                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]],
+                                       min_count: Optional[int],
+                                       factor_spread_exclude_outliers: float | None,
+                                       ) -> tuple[np.ndarray,
+                                            pd.DataFrame,
+                                            Callable[[tuple[np.ndarray, ...]], np.ndarray]]: ...
+
+@overload
+def infer_heteroscedasticy_from_stable(dvalues: RasterType , list_var: list[np.ndarray | RasterType],
+                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       list_var_names: Iterable[str],
+                                       spread_statistic: Callable,
+                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]],
+                                       min_count: Optional[int],
+                                       factor_spread_exclude_outliers: float | None,
+                                       ) -> tuple[RasterType,
+                                            pd.DataFrame,
+                                            Callable[[tuple[np.ndarray, ...]], np.ndarray]]: ...
+
+def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_var: list[np.ndarray | RasterType],
+                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       list_var_names: Iterable[str] = None,
+                                       spread_statistic: Callable = nmad,
+                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]] = None,
+                                       min_count: Optional[int] = 100,
+                                       fac_spread_outliers: float | None = 7,
+                                       ) -> tuple[np.ndarray | RasterType,
+                                            pd.DataFrame,
+                                            Callable[[tuple[np.ndarray, ...]], np.ndarray]]:
+    """
+    Infer heteroscedasticity from differenced values on stable terrain and a list of explanatory variables.
+
+    This function returns an error map, a dataframe of spread values and the error function with explanatory variables.
+    It is a convenience wrapper for `estimate_model_heteroscedasticity` to work on either Raster or array, compute the
+    stable mask and return an error map.
+
+    :param dvalues: Proxy values as array or Raster
+    :param list_var: List of size (L) of explanatory variables as array or Raster of same shape as dvalues
+    :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
+    :param list_var_names: List of size (L) of names of the explanatory variables, otherwise named var1, var2, etc.
+    :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
+    :param list_var_bins: Count of size (1), or list of size (L) of counts or custom bin edges for the explanatory variables; defaults to 10 bins
+    :param min_count: Minimum number of samples to be used as a valid statistic (replaced by nodata)
+    :param fac_spread_outliers: Exclude outliers outside this spread after standardizing; pass None to ignore.
+
+    :return: Inferred error map (array or Raster, same as input proxy values),
+        Dataframe of binned spread statistic with explanatory variables,
+        Error function with explanatory variables
+    """
+
+    # Check inputs
+    if not isinstance(dvalues, (Raster, np.ndarray)):
+        raise ValueError('The dvalues must be a Raster or NumPy array.')
+    if not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+        raise ValueError('The stable mask must be a Vector, GeoDataFrame or NumPy array.')
+
+    # Check that input stable mask can only be a georeferenced vector if the proxy values are a Raster to project onto
+    if isinstance(dvalues, Raster) and isinstance(stable_mask, (Vector, gpd.GeoDataFrame)):
+        raise ValueError('The stable mask can only passed as a Vector or GeoDataFrame if the input dvalues is a Raster.')
+
+    # Create placeholder variables names if those don't exist
+    if list_var_names is None:
+        list_var_names = ['var'+str(i) for i in range(len(list_var))]
+
+    # Get the arrays for proxy values and explanatory variables
+    if isinstance(dvalues, Raster):
+        dvalues_arr = get_array_and_mask(dvalues)[0]
+    else:
+        dvalues_arr = dvalues
+    list_var_arr = [get_array_and_mask(var)[0] if isinstance(var, Raster) else var
+                    for var in list_var if isinstance(var, Raster)]
+
+    # If the stable mask is not an array, create it
+    if not isinstance(stable_mask, np.ndarray):
+
+        # If the stable mask is a geopandas dataframe, wrap it in a Vector object
+        if isinstance(stable_mask, gpd.GeoDataFrame):
+            stable_vector = Vector(stable_mask)
+        else:
+            stable_vector = stable_mask
+
+        # Create the mask
+        stable_mask_arr = stable_vector.create_mask(dvalues)
+    # If the mask is already an array, just pass it
+    else:
+        stable_mask_arr = stable_mask
+
+    # Get the subsets on stable terrain
+    dvalues_stable_arr = dvalues_arr[stable_mask_arr]
+    list_var_stable_arr = [var_arr[stable_mask_arr] for var_arr in list_var_arr]
+
+    # Estimate and model the heteroscedasticity using only stable terrain
+    df, fun = estimate_model_heteroscedasticity(dvalues=dvalues_stable_arr, list_var=list_var_stable_arr,
+                                                list_var_names=list_var_names, spread_statistic=spread_statistic,
+                                                list_var_bins=list_var_bins, min_count=min_count,
+                                                fac_spread_outliers=fac_spread_outliers)
+
+    # Use the standardization function to get the error array for the entire input array (not only stable)
+    error = fun(tuple(list_var_arr))
+
+    # Return the right type, depending on dvalues input
+    if isinstance(dvalues, Raster):
+        return dvalues.copy(new_array=error)
+    else:
+        return error
+
+
 def _create_circular_mask(shape: Union[int, Sequence[int]], center: Optional[list[float]] = None,
                          radius: Optional[float] = None) -> np.ndarray:
     """
-    Create circular mask on a raster, defaults to the center of the array and it's half width
+    Create circular mask on a raster, defaults to the center of the array and its half width
 
     :param shape: shape of array
     :param center: center
@@ -959,9 +1153,9 @@ def rho(h):
     return rho
 
 
-def fit_sum_model_variogram(list_models: list[str] | list[Callable], empirical_variogram: pd.DataFrame,
+def fit_sum_model_variogram(list_models: list[str | Callable], empirical_variogram: pd.DataFrame,
                             bounds: list[tuple[float, float]] = None,
-                            p0: list[float] = None) -> tuple[Callable, pd.DataFrame]:
+                            p0: list[float] = None) -> tuple[Callable[[np.ndarray], np.ndarray], pd.DataFrame]:
     """
     Fit a sum of variogram models to an empirical variogram, with weighted least-squares based on sampling errors. To
     use preferably with the empirical variogram dataframe returned by the `sample_empirical_variogram` function.
@@ -1069,6 +1263,150 @@ def variogram_sum(h, *args):
 
     return variogram_sum_fit, df_params
 
+def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], list_models: list[str | Callable],
+                                       estimator = 'dowd', gsd: float = None, coords: np.ndarray = None, subsample: int = 1000,
+                                       subsample_method: str = 'cdist_equidistant', n_variograms: int = 1,
+                                       n_jobs: int = 1, verbose = False,
+                                       random_state: None | np.random.RandomState | np.random.Generator | int = None,
+                                       bounds: list[tuple[float, float]] = None, p0: list[float] = None,
+                                       **kwargs) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[np.ndarray], np.ndarray]]:
+
+    """
+    Estimate and model the spatial correlation of the input variable by empirical variogram sampling and fitting of a
+    sum of variogram model.
+
+    This function samples an empirical variogram using skgstat.Variogram, then optimizes by weighted least-squares the
+    sum of a defined number of models, using the functions `sample_empirical_variogram` and `fit_sum_model_variogram`.
+
+    :param dvalues: Proxy values of studied variable
+    :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
+        a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
+        spherical model "Sph", "Spherical" or skgstat.models.spherical).
+    :param estimator: Estimator for the empirical variogram; default to Dowd's variogram (see skgstat.Variogram for
+        the list of available estimators).
+    :param gsd: Ground sampling distance
+    :param coords: Coordinates
+    :param subsample: Number of samples to randomly draw from the values
+    :param subsample_method: Spatial subsampling method
+    :param n_variograms: Number of independent empirical variogram estimations
+    :param n_jobs: Number of processing cores
+    :param verbose: Print statements during processing
+    :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
+    :param bounds: Bounds of range and sill parameters for each model (shape K x 4 = K x range lower, range upper, sill lower, sill upper).
+    :param p0: Initial guess of ranges and sills each model (shape K x 2 = K x range first guess, sill first guess).
+
+    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Spatial correlation function
+    """
+
+    empirical_variogram = sample_empirical_variogram(values=dvalues, estimator=estimator, gsd=gsd, coords=coords,
+                                                     subsample=subsample, subsample_method=subsample_method,
+                                                     n_variograms=n_variograms, n_jobs=n_jobs, verbose=verbose,
+                                                     random_state=random_state, **kwargs)
+
+    params_variogram_model = fit_sum_model_variogram(list_models=list_models, empirical_variogram=empirical_variogram,
+                                             bounds=bounds, p0=p0)[1]
+
+    spatial_correlation_func = correlation_from_variogram(params_variogram_model=params_variogram_model)
+
+    return empirical_variogram, params_variogram_model, spatial_correlation_func
+
+def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
+                                          stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                          list_models: list[str | Callable],
+                                          errors: np.ndarray | RasterType = None,
+                                          estimator = 'dowd', gsd: float = None, coords: np.ndarray = None,
+                                          subsample: int = 1000, subsample_method: str = 'cdist_equidistant',
+                                          n_variograms: int = 1, n_jobs: int = 1, verbose = False,
+                                          random_state: None | np.random.RandomState | np.random.Generator | int = None,
+                                          bounds: list[tuple[float, float]] = None, p0: list[float] = None,
+                                          **kwargs
+                                          ) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[np.ndarray], np.ndarray]]:
+    """
+    Infer spatial correlation of errors from differenced values on stable terrain and a list of variogram model to fit
+    as a sum.
+
+    This function returns a dataframe of the empirical variogram, a dataframe of optimized model parameters, and a
+    spatial correlation function.
+    It is a convenience wrapper for `estimate_model_spatial_correlation` to work on either Raster or array and compute
+    the stable mask.
+
+    :param dvalues: Proxy values as array or Raster
+    :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
+    :param errors: Error values to account for heteroscedasticity (ignored if None).
+    :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
+    a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
+    spherical model "Sph", "Spherical" or skgstat.models.spherical).
+    :param estimator: Estimator for the empirical variogram; default to Dowd's variogram (see skgstat.Variogram for
+    the list of available estimators).
+    :param gsd: Ground sampling distance
+    :param coords: Coordinates
+    :param subsample: Number of samples to randomly draw from the values
+    :param subsample_method: Spatial subsampling method
+    :param n_variograms: Number of independent empirical variogram estimations
+    :param n_jobs: Number of processing cores
+    :param verbose: Print statements during processing
+    :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
+    :param bounds: Bounds of range and sill parameters for each model (shape K x 4 = K x range lower, range upper, sill lower, sill upper).
+    :param p0: Initial guess of ranges and sills each model (shape K x 2 = K x range first guess, sill first guess).
+
+    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Spatial correlation function
+    """
+
+    # Check inputs
+    if not isinstance(dvalues, (Raster, np.ndarray)):
+        raise ValueError('The dvalues must be a Raster or NumPy array.')
+    if not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+        raise ValueError('The stable mask must be a Vector, GeoDataFrame or NumPy array.')
+
+    # Check that input stable mask can only be a georeferenced vector if the proxy values are a Raster to project onto
+    if isinstance(dvalues, Raster) and isinstance(stable_mask, (Vector, gpd.GeoDataFrame)):
+        raise ValueError(
+            'The stable mask can only passed as a Vector or GeoDataFrame if the input dvalues is a Raster.')
+
+    # Get array if input is a Raster
+    if isinstance(dvalues, Raster):
+        dvalues_arr = get_array_and_mask(dvalues)[0]
+    else:
+        dvalues_arr = dvalues
+
+    # If the stable mask is not an array, create it
+    if not isinstance(stable_mask, np.ndarray):
+
+        # If the stable mask is a geopandas dataframe, wrap it in a Vector object
+        if isinstance(stable_mask, gpd.GeoDataFrame):
+            stable_vector = Vector(stable_mask)
+        else:
+            stable_vector = stable_mask
+
+        # Create the mask
+        stable_mask_arr = stable_vector.create_mask(dvalues)
+    # If the mask is already an array, just pass it
+    else:
+        stable_mask_arr = stable_mask
+
+    # Get the subsets on stable terrain
+    dvalues_stable_arr = dvalues_arr[stable_mask_arr]
+
+    # Perform standardization if error array is provided
+    if errors is not None:
+        if isinstance(errors, Raster):
+            errors_arr = get_array_and_mask(errors)[0]
+        else:
+            errors_arr = errors
+        errors_stable_arr = errors_arr[stable_mask_arr]
+
+        # Standardize
+        dvalues_stable_arr /= errors_stable_arr
+
+    # Estimate and model spatial correlations
+    empirical_variogram, params_variogram_model, spatial_correlation_func = estimate_model_spatial_correlation(
+        values=dvalues_stable_arr, list_models=list_models, estimator=estimator, gsd=gsd, coords=coords,
+        subsample=subsample, subsample_method=subsample_method, n_variograms=n_variograms, n_jobs=n_jobs,
+        verbose=verbose, random_state=random_state, bounds=bounds, p0=p0, **kwargs)
+
+    return empirical_variogram, params_variogram_model, spatial_correlation_func
+
+
 def _check_validity_params_variogram(params_variogram_model: pd.DataFrame):
     """Check the validity of the modelled variogram parameters dataframe (mostly in the case it is passed manually)."""
 

From 45df8ec0e54fef8a326f53ee276080b5c7ab3ad6 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Sun, 21 Aug 2022 11:17:46 +0200
Subject: [PATCH 06/21] Finalize convenience error pipeline functions and add
 tests

---
 tests/test_spatialstats.py | 183 +++++++++++++++++++++++++++++++++++--
 xdem/spatialstats.py       | 104 ++++++++++++++++-----
 2 files changed, 256 insertions(+), 31 deletions(-)

diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 4030f731..f2749b6e 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -8,6 +8,7 @@
 import skgstat
 
 import geoutils as gu
+import geoutils.spatial_tools
 from geoutils import Raster, Vector
 import numpy as np
 import pandas as pd
@@ -22,7 +23,7 @@
 
 PLOT = False
 
-def load_ref_and_diff() -> tuple[Raster, Raster, np.ndarray]:
+def load_ref_and_diff() -> tuple[Raster, Raster, np.ndarray, Vector]:
     """Load example files to try coregistration methods with."""
 
     reference_raster = Raster(examples.get_path("longyearbyen_ref_dem"))
@@ -31,7 +32,7 @@ def load_ref_and_diff() -> tuple[Raster, Raster, np.ndarray]:
     ddem = Raster(examples.get_path('longyearbyen_ddem'))
     mask = glacier_mask.create_mask(ddem)
 
-    return reference_raster, ddem, mask
+    return reference_raster, ddem, mask, glacier_mask
 
 class TestVariogram:
 
@@ -262,6 +263,65 @@ def test_check_params_variogram_model(self):
             xdem.spatialstats._check_validity_params_variogram(
                 pd.DataFrame(data={'model': ['stable'], 'range': [100], 'psill': [1], 'smooth': [-1]}))
 
+    def test_estimate_model_spatial_correlation_and_infer_from_stable(self):
+        """Test consistency of outputs and errors in wrapper functions for estimation of spatial correlation"""
+
+        # Load data
+        diff, mask_glacier, vector_glacier = load_ref_and_diff()[1:4]
+
+        # Keep only data on stable
+        diff_on_stable = diff.copy()
+        diff_on_stable.set_mask(mask_glacier)
+
+        # Run wrapper estimate and model function
+        emp_vgm_1, params_model_vgm_1, _ = \
+            xdem.spatialstats.estimate_model_spatial_correlation(dvalues=diff_on_stable, list_models=['Gau', 'Sph'],
+                                                                 subsample=100, random_state=42)
+
+        # Check that the output matches that of the original function under the same random state
+        emp_vgm_2 = xdem.spatialstats.sample_empirical_variogram(values=diff_on_stable, estimator='dowd', subsample=100,
+                                                               random_state=42)
+        pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_2)
+        params_model_vgm_2 = xdem.spatialstats.fit_sum_model_variogram(list_models=['Gau', 'Sph'],
+                                                                       empirical_variogram=emp_vgm_2)[1]
+        pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_2)
+
+        # Run wrapper infer from stable function with a Raster and the mask, and check the consistency there as well
+        emp_vgm_3, params_model_vgm_3, _ = \
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable, stable_mask=~mask_glacier.squeeze(),
+                                                                    list_models=['Gau', 'Sph'], subsample=100, random_state=42)
+        pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_3)
+        pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_3)
+
+        # Run again with array instead of Raster as input
+        diff_on_stable_arr = gu.spatial_tools.get_array_and_mask(diff_on_stable)[0]
+        emp_vgm_4, params_model_vgm_4, _ = \
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable_arr, gsd=diff_on_stable.res[0],
+                                                                    stable_mask=~mask_glacier.squeeze(),
+                                                                    list_models=['Gau', 'Sph'], subsample=100,
+                                                                    random_state=42)
+        pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_4)
+        pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_4)
+
+        # Check that errors are raised with wrong input
+        with pytest.raises(ValueError, match='The dvalues must be a Raster or NumPy array.'):
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues='not_an_array', stable_mask=~mask_glacier.squeeze(),
+                                                                    list_models=['Gau', 'Sph'], random_state=42)
+        with pytest.raises(ValueError, match='The stable mask must be a Vector, GeoDataFrame or NumPy array.'):
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff,
+                                                                    stable_mask='not_a_vector_or_array',
+                                                                    list_models=['Gau', 'Sph'], random_state=42)
+        with pytest.raises(ValueError, match='The unstable mask must be a Vector, GeoDataFrame or NumPy array.'):
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff,
+                                                                    unstable_mask='not_a_vector_or_array',
+                                                                    list_models=['Gau', 'Sph'], random_state=42)
+        diff_on_stable_arr = gu.spatial_tools.get_array_and_mask(diff_on_stable)[0]
+        with pytest.raises(ValueError, match='The stable mask can only passed as a Vector or GeoDataFrame if the input '
+                                             'dvalues is a Raster.'):
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable_arr,
+                                                                    stable_mask=vector_glacier,
+                                                                    list_models=['Gau', 'Sph'], random_state=42)
+
 
     def test_empirical_fit_plotting(self):
         """Verify that the shape of the empirical variogram output works with the fit and plotting"""
@@ -401,6 +461,9 @@ def test_neff_exact_and_approx_hugonnet(self):
     def test_number_effective_samples(self):
         """Test that the wrapper function for neff functions behaves correctly and that output values are robust"""
 
+        raster = load_ref_and_diff()[0]
+        outlines = load_ref_and_diff()[3]
+
         # The function should return the same result as neff_circular_approx_numerical when using a numerical area
         area = 10000
         params_variogram_model = pd.DataFrame(data={'model':['spherical', 'gaussian'], 'range':[300, 3000], 'psill':[0.5, 0.5]})
@@ -413,7 +476,6 @@ def test_number_effective_samples(self):
         # The function should return the same results as neff_hugonnet_approx when using a shape area
         # First, get the vector area and compute with the wrapper function
         res = 100.
-        outlines = Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
         outlines_brom = Vector(outlines.ds[outlines.ds['NAME']=='Brombreen'])
         neff1 = xdem.spatialstats.number_effective_samples(area=outlines_brom, params_variogram_model=params_variogram_model,
                                                            rasterize_resolution=res, random_state=42)
@@ -430,7 +492,6 @@ def test_number_effective_samples(self):
         assert neff1 == pytest.approx(neff2, rel=0.00001)
 
         # Check that using a Raster as input for the resolution works
-        raster = Raster(examples.get_path("longyearbyen_ref_dem"))
         neff3 = xdem.spatialstats.number_effective_samples(area=outlines_brom,
                                                            params_variogram_model=params_variogram_model,
                                                            rasterize_resolution=raster, random_state=42)
@@ -544,9 +605,10 @@ class TestBinning:
 
     def test_nd_binning(self):
 
-        ref, diff, mask = load_ref_and_diff()
+        ref, diff, mask = load_ref_and_diff()[0:3]
 
-        slope, aspect = xdem.coreg.calculate_slope_and_aspect(ref.data.squeeze())
+        ref_arr = gu.spatial_tools.get_array_and_mask(ref)[0]
+        slope, aspect = xdem.terrain.get_terrain_attribute(ref_arr, resolution=ref.res, attribute=['slope', 'aspect'])
 
         # 1D binning, by default will create 10 bins
         df = xdem.spatialstats.nd_binning(values=diff.data.flatten(), list_var=[slope.flatten()], list_var_names=['slope'])
@@ -660,8 +722,10 @@ def test_interp_nd_binning(self):
                            df[np.logical_and.reduce((df['var1'] == i, df['var2'] == j, df['var3'] == k))]['statistic'].values[0]
 
         # Check if it works with nd_binning output
-        ref, diff, mask = load_ref_and_diff()
-        slope, aspect = xdem.coreg.calculate_slope_and_aspect(ref.data.squeeze())
+        ref, diff, mask = load_ref_and_diff()[0:3]
+        ref_arr = gu.spatial_tools.get_array_and_mask(ref)[0]
+
+        slope, aspect = xdem.terrain.get_terrain_attribute(ref_arr, resolution=ref.res, attribute=['slope', 'aspect'])
 
         df = xdem.spatialstats.nd_binning(values=diff.data.flatten(), list_var=[slope.flatten(), ref.data.flatten(), aspect.flatten()], list_var_names=['slope', 'elevation', 'aspect'])
 
@@ -684,7 +748,7 @@ def test_interp_nd_binning(self):
         # Check a value is returned inside the grid
         assert np.isfinite(fun([15, 1000]))
         # Check the nmad increases with slope
-        assert fun([20, 800]) > fun([0, 800])
+        assert fun([40, 800]) > fun([0, 800])
         # Check a value is returned outside the grid
         assert all(np.isfinite(fun(([-5, 50], [-500,3000]))))
 
@@ -699,6 +763,107 @@ def test_interp_nd_binning(self):
         # Check a value is returned outside the grid
         assert all(np.isfinite(fun(([-5, 50], [-500,3000], [-2*np.pi,4*np.pi]))))
 
+    def test_two_step_standardization(self):
+        """Test two-step standardization function"""
+
+        ref_dem, diff, mask_glacier, vector_glacier = load_ref_and_diff()
+
+        # Get attributes
+        slope, maximum_curv = xdem.terrain.get_terrain_attribute(ref_dem, attribute=['slope', 'maximum_curvature'])
+
+        # Test this gives the same results as when using the base functions
+        diff_arr = gu.spatial_tools.get_array_and_mask(diff)[0]
+        slope_arr = gu.spatial_tools.get_array_and_mask(slope)[0]
+        maximum_curv_arr = gu.spatial_tools.get_array_and_mask(maximum_curv)[0]
+        stable_mask_arr = ~vector_glacier.create_mask(ref_dem).squeeze()
+
+        # Reproduce the first steps of binning
+        df_binning = xdem.spatialstats.nd_binning(values=diff_arr[stable_mask_arr],
+                                                  list_var=[slope_arr[stable_mask_arr],
+                                                            maximum_curv_arr[stable_mask_arr]],
+                                                  list_var_names=['var1', 'var2'],
+                                                  statistics=[xdem.spatialstats.nmad])
+        unscaled_fun = xdem.spatialstats.interp_nd_binning(df_binning, list_var_names=['var1', 'var2'],
+                                                           statistic='nmad')
+        # The zscore spread should not be one right after binning
+        zscores = diff_arr[stable_mask_arr] / unscaled_fun((slope_arr[stable_mask_arr], maximum_curv_arr[stable_mask_arr]))
+        scale_fac = xdem.spatialstats.nmad(zscores)
+        assert scale_fac != 1
+
+        # Filter with a factor of 5 and the standard deviation and check the function outputs the exact same array
+        zscores[np.abs(zscores) > 3*np.nanstd(zscores)] = np.nan
+        scale_fac_std = np.nanstd(zscores)
+        zscores /= scale_fac_std
+        zscores_2, final_func = xdem.spatialstats.two_step_standardization(diff_arr[stable_mask_arr],
+                                                                           list_var=[slope_arr[stable_mask_arr],
+                                                                                     maximum_curv_arr[stable_mask_arr]],
+                                                                           unscaled_error_fun=unscaled_fun,
+                                                                           spread_statistic=np.nanstd,
+                                                                           fac_spread_outliers=3)
+        assert np.array_equal(zscores, zscores_2, equal_nan=True)
+
+        # Check the output of the scaled function is simply the unscaled one times the spread statistic
+        test_slopes = np.linspace(0, 50, 50)
+        test_max_curvs = np.linspace(0, 10, 50)
+        assert np.array_equal(unscaled_fun((test_slopes, test_max_curvs)) * scale_fac_std,
+                              final_func((test_slopes, test_max_curvs)))
+
+
+    def test_estimate_model_heteroscedasticity_and_infer_from_stable(self):
+        """Test consistency of outputs and errors in wrapper functions for estimation of heteroscedasticity"""
+
+        # Load data
+        ref_dem, diff, mask_glacier, vector_glacier = load_ref_and_diff()
+
+        # Get attributes
+        slope, maximum_curv = xdem.terrain.get_terrain_attribute(ref_dem, attribute=['slope', 'maximum_curvature'])
+
+        # Test infer function
+        errors_1, df_binning_1, err_fun_1 = \
+            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
+                                                                 list_var=[slope, maximum_curv],
+                                                                 unstable_mask=vector_glacier)
+
+        # Test this gives the same results as when using the base functions
+        diff_arr = gu.spatial_tools.get_array_and_mask(diff)[0]
+        slope_arr = gu.spatial_tools.get_array_and_mask(slope)[0]
+        maximum_curv_arr = gu.spatial_tools.get_array_and_mask(maximum_curv)[0]
+        stable_mask_arr = ~vector_glacier.create_mask(ref_dem).squeeze()
+
+        df_binning_2, err_fun_2 = \
+            xdem.spatialstats.estimate_model_heteroscedasticity(dvalues=diff_arr[stable_mask_arr],
+                                                                list_var=[slope_arr[stable_mask_arr],
+                                                                          maximum_curv_arr[stable_mask_arr]],
+                                                                list_var_names=['var1', 'var2'])
+
+        pd.testing.assert_frame_equal(df_binning_1, df_binning_2)
+        test_slopes = np.linspace(0, 50, 50)
+        test_max_curvs = np.linspace(0, 10, 50)
+        assert np.array_equal(err_fun_1((test_slopes, test_max_curvs)), err_fun_2((test_slopes, test_max_curvs)))
+
+        # Test the error map is consistent as well
+        errors_2_arr = err_fun_2((slope_arr, maximum_curv_arr))
+        errors_1_arr = gu.spatial_tools.get_array_and_mask(errors_1)[0]
+        assert np.array_equal(errors_1_arr, errors_2_arr, equal_nan=True)
+
+        # Check that errors are raised with wrong input
+        with pytest.raises(ValueError, match='The dvalues must be a Raster or NumPy array.'):
+            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues='not_an_array',
+                                                                 stable_mask=~mask_glacier.squeeze(),
+                                                                 list_var=[slope_arr])
+        with pytest.raises(ValueError, match='The stable mask must be a Vector, GeoDataFrame or NumPy array.'):
+            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
+                                                                 stable_mask='not_a_vector_or_array',
+                                                                 list_var=[slope_arr])
+        with pytest.raises(ValueError, match='The unstable mask must be a Vector, GeoDataFrame or NumPy array.'):
+            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
+                                                                 unstable_mask='not_a_vector_or_array',
+                                                                 list_var=[slope_arr])
+
+        with pytest.raises(ValueError, match='The stable mask can only passed as a Vector or GeoDataFrame if the input '
+                                             'dvalues is a Raster.'):
+            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff_arr, stable_mask=vector_glacier,
+                                                                 list_var=[slope_arr])
 
     def test_plot_binning(self):
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 7138def1..608dc27f 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -367,6 +367,7 @@ def estimate_model_heteroscedasticity(dvalues: np.ndarray, list_var: Iterable[np
 @overload
 def infer_heteroscedasticy_from_stable(dvalues: np.ndarray , list_var: list[np.ndarray | RasterType],
                                        stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
                                        list_var_names: Iterable[str],
                                        spread_statistic: Callable,
                                        list_var_bins: Optional[Union[int,Iterable[Iterable]]],
@@ -379,6 +380,7 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray , list_var: list[np.n
 @overload
 def infer_heteroscedasticy_from_stable(dvalues: RasterType , list_var: list[np.ndarray | RasterType],
                                        stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
                                        list_var_names: Iterable[str],
                                        spread_statistic: Callable,
                                        list_var_bins: Optional[Union[int,Iterable[Iterable]]],
@@ -389,7 +391,8 @@ def infer_heteroscedasticy_from_stable(dvalues: RasterType , list_var: list[np.n
                                             Callable[[tuple[np.ndarray, ...]], np.ndarray]]: ...
 
 def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_var: list[np.ndarray | RasterType],
-                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
+                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
                                        list_var_names: Iterable[str] = None,
                                        spread_statistic: Callable = nmad,
                                        list_var_bins: Optional[Union[int,Iterable[Iterable]]] = None,
@@ -405,9 +408,12 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_va
     It is a convenience wrapper for `estimate_model_heteroscedasticity` to work on either Raster or array, compute the
     stable mask and return an error map.
 
+    If no stable or unstable mask is provided to mask in or out the values, all terrain is used.
+
     :param dvalues: Proxy values as array or Raster
     :param list_var: List of size (L) of explanatory variables as array or Raster of same shape as dvalues
     :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
+    :param unstable_mask: Vector shapefile of unstable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
     :param list_var_names: List of size (L) of names of the explanatory variables, otherwise named var1, var2, etc.
     :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
     :param list_var_bins: Count of size (1), or list of size (L) of counts or custom bin edges for the explanatory variables; defaults to 10 bins
@@ -422,16 +428,18 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_va
     # Check inputs
     if not isinstance(dvalues, (Raster, np.ndarray)):
         raise ValueError('The dvalues must be a Raster or NumPy array.')
-    if not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+    if stable_mask is not None and not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
         raise ValueError('The stable mask must be a Vector, GeoDataFrame or NumPy array.')
+    if unstable_mask is not None and not isinstance(unstable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+        raise ValueError('The unstable mask must be a Vector, GeoDataFrame or NumPy array.')
 
     # Check that input stable mask can only be a georeferenced vector if the proxy values are a Raster to project onto
-    if isinstance(dvalues, Raster) and isinstance(stable_mask, (Vector, gpd.GeoDataFrame)):
+    if not isinstance(dvalues, Raster) and (isinstance(stable_mask, (Vector, gpd.GeoDataFrame)) or isinstance(unstable_mask,  (Vector, gpd.GeoDataFrame))):
         raise ValueError('The stable mask can only passed as a Vector or GeoDataFrame if the input dvalues is a Raster.')
 
     # Create placeholder variables names if those don't exist
     if list_var_names is None:
-        list_var_names = ['var'+str(i) for i in range(len(list_var))]
+        list_var_names = ['var'+str(i+1) for i in range(len(list_var))]
 
     # Get the arrays for proxy values and explanatory variables
     if isinstance(dvalues, Raster):
@@ -442,7 +450,9 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_va
                     for var in list_var if isinstance(var, Raster)]
 
     # If the stable mask is not an array, create it
-    if not isinstance(stable_mask, np.ndarray):
+    if stable_mask is None:
+        stable_mask_arr = np.ones(np.shape(dvalues_arr), dtype=bool)
+    elif not isinstance(stable_mask, np.ndarray):
 
         # If the stable mask is a geopandas dataframe, wrap it in a Vector object
         if isinstance(stable_mask, gpd.GeoDataFrame):
@@ -456,6 +466,25 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_va
     else:
         stable_mask_arr = stable_mask
 
+    # If the unstable mask is not an array, create it
+    if unstable_mask is None:
+        unstable_mask_arr = np.zeros(np.shape(dvalues_arr), dtype=bool)
+    elif not isinstance(unstable_mask, np.ndarray):
+
+        # If the unstable mask is a geopandas dataframe, wrap it in a Vector object
+        if isinstance(unstable_mask, gpd.GeoDataFrame):
+            unstable_vector = Vector(unstable_mask)
+        else:
+            unstable_vector = unstable_mask
+
+        # Create the mask
+        unstable_mask_arr = unstable_vector.create_mask(dvalues)
+    # If the mask is already an array, just pass it
+    else:
+        unstable_mask_arr = unstable_mask
+
+    stable_mask_arr = np.logical_and(stable_mask_arr, ~unstable_mask_arr).squeeze()
+
     # Get the subsets on stable terrain
     dvalues_stable_arr = dvalues_arr[stable_mask_arr]
     list_var_stable_arr = [var_arr[stable_mask_arr] for var_arr in list_var_arr]
@@ -471,9 +500,9 @@ def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_va
 
     # Return the right type, depending on dvalues input
     if isinstance(dvalues, Raster):
-        return dvalues.copy(new_array=error)
+        return dvalues.copy(new_array=error), df, fun
     else:
-        return error
+        return error, df, fun
 
 
 def _create_circular_mask(shape: Union[int, Sequence[int]], center: Optional[list[float]] = None,
@@ -889,17 +918,19 @@ def sample_empirical_variogram(values: Union[np.ndarray, RasterType], gsd: float
     elif isinstance(values, (np.ndarray, np.ma.masked_array)):
         values, mask = get_array_and_mask(values)
     else:
-        raise TypeError('Values must be of type np.ndarray, np.ma.masked_array or Raster subclass.')
+        raise ValueError('Values must be of type np.ndarray, np.ma.masked_array or Raster subclass.')
     values = values.squeeze()
 
     # Then, check if the logic between values, coords and gsd is respected
     if (gsd is not None or subsample_method in ['cdist_equidistant', 'pdist_disk','pdist_ring']) and values.ndim == 1:
-        raise TypeError('Values array must be 2D when using any of the "cdist_equidistant", "pdist_disk" and '
+        raise ValueError('Values array must be 2D when using any of the "cdist_equidistant", "pdist_disk" and '
                         '"pdist_ring" methods, or providing a ground sampling distance instead of coordinates.')
     elif coords is not None and values.ndim != 1:
-        raise TypeError('Values array must be 1D when providing coordinates.')
+        raise ValueError('Values array must be 1D when providing coordinates.')
     elif coords is not None and (coords.shape[0] != 2 and coords.shape[1] != 2):
-        raise TypeError('The coordinates array must have one dimension with length equal to 2')
+        raise ValueError('The coordinates array must have one dimension with length equal to 2')
+    elif values.ndim == 2 and gsd is None:
+        raise ValueError('The ground sampling distance must be defined when passing a 2D values array.')
 
     # Check the subsample method provided exists, otherwise list options
     if subsample_method not in ['cdist_equidistant','cdist_point','pdist_point','pdist_disk','pdist_ring']:
@@ -1311,8 +1342,9 @@ def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], l
     return empirical_variogram, params_variogram_model, spatial_correlation_func
 
 def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
-                                          stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
                                           list_models: list[str | Callable],
+                                          stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
+                                          unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
                                           errors: np.ndarray | RasterType = None,
                                           estimator = 'dowd', gsd: float = None, coords: np.ndarray = None,
                                           subsample: int = 1000, subsample_method: str = 'cdist_equidistant',
@@ -1330,12 +1362,15 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     It is a convenience wrapper for `estimate_model_spatial_correlation` to work on either Raster or array and compute
     the stable mask.
 
+    If no stable or unstable mask is provided to mask in or out the values, all terrain is used.
+
     :param dvalues: Proxy values as array or Raster
-    :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
-    :param errors: Error values to account for heteroscedasticity (ignored if None).
     :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
     a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
     spherical model "Sph", "Spherical" or skgstat.models.spherical).
+    :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
+    :param unstable_mask: Vector shapefile of unstable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
+    :param errors: Error values to account for heteroscedasticity (ignored if None).
     :param estimator: Estimator for the empirical variogram; default to Dowd's variogram (see skgstat.Variogram for
     the list of available estimators).
     :param gsd: Ground sampling distance
@@ -1355,22 +1390,28 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     # Check inputs
     if not isinstance(dvalues, (Raster, np.ndarray)):
         raise ValueError('The dvalues must be a Raster or NumPy array.')
-    if not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+    if stable_mask is not None and not isinstance(stable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
         raise ValueError('The stable mask must be a Vector, GeoDataFrame or NumPy array.')
+    if unstable_mask is not None and not isinstance(unstable_mask, (np.ndarray, Vector, gpd.GeoDataFrame)):
+        raise ValueError('The unstable mask must be a Vector, GeoDataFrame or NumPy array.')
 
     # Check that input stable mask can only be a georeferenced vector if the proxy values are a Raster to project onto
-    if isinstance(dvalues, Raster) and isinstance(stable_mask, (Vector, gpd.GeoDataFrame)):
+    if not isinstance(dvalues, Raster) and isinstance(stable_mask, (Vector, gpd.GeoDataFrame)):
         raise ValueError(
             'The stable mask can only passed as a Vector or GeoDataFrame if the input dvalues is a Raster.')
 
     # Get array if input is a Raster
     if isinstance(dvalues, Raster):
         dvalues_arr = get_array_and_mask(dvalues)[0]
+        gsd = dvalues.res[0]
     else:
         dvalues_arr = dvalues
 
     # If the stable mask is not an array, create it
-    if not isinstance(stable_mask, np.ndarray):
+    # If the stable mask is not an array, create it
+    if stable_mask is None:
+        stable_mask_arr = np.ones(np.shape(dvalues_arr), dtype=bool)
+    elif not isinstance(stable_mask, np.ndarray):
 
         # If the stable mask is a geopandas dataframe, wrap it in a Vector object
         if isinstance(stable_mask, gpd.GeoDataFrame):
@@ -1384,8 +1425,28 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     else:
         stable_mask_arr = stable_mask
 
-    # Get the subsets on stable terrain
-    dvalues_stable_arr = dvalues_arr[stable_mask_arr]
+    # If the unstable mask is not an array, create it
+    if unstable_mask is None:
+        unstable_mask_arr = np.zeros(np.shape(dvalues_arr), dtype=bool)
+    elif not isinstance(unstable_mask, np.ndarray):
+
+        # If the unstable mask is a geopandas dataframe, wrap it in a Vector object
+        if isinstance(unstable_mask, gpd.GeoDataFrame):
+            unstable_vector = Vector(unstable_mask)
+        else:
+            unstable_vector = unstable_mask
+
+        # Create the mask
+        unstable_mask_arr = unstable_vector.create_mask(dvalues)
+    # If the mask is already an array, just pass it
+    else:
+        unstable_mask_arr = unstable_mask
+
+    stable_mask_arr = np.logical_and(stable_mask_arr, ~unstable_mask_arr).squeeze()
+
+    # Need to preserve the shape, so setting as NaNs all points not on stable terrain
+    dvalues_stable_arr = dvalues_arr.copy()
+    dvalues_stable_arr[~stable_mask_arr] = np.nan
 
     # Perform standardization if error array is provided
     if errors is not None:
@@ -1393,14 +1454,13 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
             errors_arr = get_array_and_mask(errors)[0]
         else:
             errors_arr = errors
-        errors_stable_arr = errors_arr[stable_mask_arr]
 
         # Standardize
-        dvalues_stable_arr /= errors_stable_arr
+        dvalues_stable_arr /= errors_arr
 
     # Estimate and model spatial correlations
     empirical_variogram, params_variogram_model, spatial_correlation_func = estimate_model_spatial_correlation(
-        values=dvalues_stable_arr, list_models=list_models, estimator=estimator, gsd=gsd, coords=coords,
+        dvalues=dvalues_stable_arr, list_models=list_models, estimator=estimator, gsd=gsd, coords=coords,
         subsample=subsample, subsample_method=subsample_method, n_variograms=n_variograms, n_jobs=n_jobs,
         verbose=verbose, random_state=random_state, bounds=bounds, p0=p0, **kwargs)
 

From 30496720ff398a5d008885683466fba0f0263304 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Sun, 21 Aug 2022 17:25:00 +0200
Subject: [PATCH 07/21] Add two basic gallery examples for heteroscedasticity
 and spatial correlation of errors

---
 examples/error_map.py                         | 56 ++++++++++++++++++
 ...or.py => heterosc_estimation_modelling.py} |  0
 examples/spatial_correlation.py               | 59 +++++++++++++++++++
 ...r.py => variogram_estimation_modelling.py} |  0
 xdem/spatialstats.py                          | 11 +++-
 5 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 examples/error_map.py
 rename examples/{plot_nonstationary_error.py => heterosc_estimation_modelling.py} (100%)
 create mode 100644 examples/spatial_correlation.py
 rename examples/{plot_vgm_error.py => variogram_estimation_modelling.py} (100%)

diff --git a/examples/error_map.py b/examples/error_map.py
new file mode 100644
index 00000000..987d9ed1
--- /dev/null
+++ b/examples/error_map.py
@@ -0,0 +1,56 @@
+"""
+Elevation error map
+===================
+
+Digital elevation models have a precision that can vary with terrain and instrument-related variables. Here, we apply
+the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
+variability in elevation error, using terrain slope and maximum curvature as explanatory variables and stable terrain
+as an error proxy for moving terrain.
+
+**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 4.
+Errors in elevation difference can be converted in elevation errors following Equation 7 (equal if other source of much
+higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision).
+"""
+# sphinx_gallery_thumbnail_number = 1
+import xdem
+import geoutils as gu
+
+# %%
+# We load a difference of DEMs at Longyearbyen, already coregistered using :ref:`coregistration_nuthkaab` as shown in
+# the :ref:`sphx_glr_auto_examples_plot_nuth_kaab.py` example. We also load the reference DEM to derive terrain
+# attributes and the glacier outlines here corresponding to moving terrain.
+dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
+ref_dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem"))
+glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
+
+# %%
+# We derive the terrain slope and maximum curvature from the reference DEM.
+slope, maximum_curvature = xdem.terrain.get_terrain_attribute(ref_dem, attribute=['slope', 'maximum_curvature'])
+
+# %%
+# Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain:
+error_map, df_binning, error_function = \
+    xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
+                                                         list_var_names=['slope', 'maxc'],
+                                                         unstable_mask=glacier_outlines)
+
+# %%
+# The first output corresponds to the error map for the DEM (1-sigma):
+error_map.show(vmin=2, vmax=7, cmap='Reds', cb_title='Elevation error (1$\sigma$)')
+
+# %%
+# The second output is the dataframe of 2D binning with slope and maximum curvature:
+df_binning
+
+# %%
+# The third output is the 2D binning interpolant, i.e. an error function with the slope and maximum curvature
+# (*Note: below we multiply the maximum curvature by 100 to convert it in m-1*):
+print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(0, 0, error_function((0, 0))))
+print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(40, 0, error_function((40, 0))))
+print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(0, 100*5, error_function((0, 5))))
+
+# %%
+# This pipeline will not always work optimally with default parameters: spread estimates can be affected by skewed
+# distributions, the binning by extreme range of values, some DEMs do not have any error variability with terrain (e.g.,
+# terrestrial photogrammetry). **To learn how to tune more parameters and use the subfunctions, see the gallery example:**
+# :ref:`sphx_glr_auto_examples_heterosc_estimation_modelling.py`!
\ No newline at end of file
diff --git a/examples/plot_nonstationary_error.py b/examples/heterosc_estimation_modelling.py
similarity index 100%
rename from examples/plot_nonstationary_error.py
rename to examples/heterosc_estimation_modelling.py
diff --git a/examples/spatial_correlation.py b/examples/spatial_correlation.py
new file mode 100644
index 00000000..6d21446e
--- /dev/null
+++ b/examples/spatial_correlation.py
@@ -0,0 +1,59 @@
+"""
+Spatial correlation of errors
+=============================
+
+Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we apply
+the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
+spatial correlation in elevation error, using a sum of variogram forms to model this correlation, and stable terrain
+as an error proxy for moving terrain.
+
+**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 5.
+"""
+# sphinx_gallery_thumbnail_number = 1
+import xdem
+import geoutils as gu
+
+# %%
+# We load a difference of DEMs at Longyearbyen, already coregistered using :ref:`coregistration_nuthkaab` as shown in
+# the :ref:`sphx_glr_auto_examples_plot_nuth_kaab.py` example. We also load the glacier outlines here corresponding to
+# moving terrain.
+dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
+glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
+
+# %%
+# Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain (*Note: we pass a
+# random_state argument to ensure a fixed random subsampling in this example, useful for result reproducibility*):
+df_empirical_variogram, df_model_params, spatial_corr_function = \
+    xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=dh, list_models=['Gaussian', 'Spherical'],
+                                                            unstable_mask=glacier_outlines, random_state=42)
+
+# %%
+# The first output corresponds to the dataframe of the empirical variogram, by default estimated using Dowd's estimator
+# and the circular sampling scheme of `skgstat.RasterEquidistantMetricSpace` (Fig. S13 of Hugonnet et al. (2022)):
+df_empirical_variogram
+
+# %%
+# The second output is the dataframe of optimized model parameters for a sum of gaussian and spherical models:
+df_model_params
+
+# %%
+# The third output is the spatial correlation function with spatial lags, derived from the variogram:
+print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(0)*100, 0))
+print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(100)*100, 100))
+print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(1000)*100, 1000))
+print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(10000)*100, 10000))
+print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(30000)*100, 30000))
+
+# %%
+# We can plot the empirical variogram and its model on a non-linear X-axis to identify the multi-scale correlations.
+xdem.spatialstats.plot_variogram(df=df_empirical_variogram,
+                                 list_fit_fun=[xdem.spatialstats.get_variogram_model_func(df_model_params)],
+                                 xlabel='Spatial lag (m)',
+                                 ylabel='Variance of\nelevation differences (m)',
+                                 xscale_range_split=[100, 1000])
+
+# %%
+# This pipeline will not always work optimally with default parameters: variogram sampling is more robust with a lot of
+# samples but takes long computing times, and the fitting might require multiple tries for forms and possibly bounds
+# and first guesses to help the least-squares optimization. **To learn how to tune more parameters and use the
+# subfunctions, see the gallery example:** :ref:`sphx_glr_auto_examples_variogram_estimation_modelling.py`!
\ No newline at end of file
diff --git a/examples/plot_vgm_error.py b/examples/variogram_estimation_modelling.py
similarity index 100%
rename from examples/plot_vgm_error.py
rename to examples/variogram_estimation_modelling.py
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 608dc27f..0f75960a 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1333,6 +1333,8 @@ def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], l
                                                      subsample=subsample, subsample_method=subsample_method,
                                                      n_variograms=n_variograms, n_jobs=n_jobs, verbose=verbose,
                                                      random_state=random_state, **kwargs)
+    # TODO: prevent this last bin with few samples to be returned by `sample_empirical_variogram`
+    empirical_variogram = empirical_variogram[:-1]
 
     params_variogram_model = fit_sum_model_variogram(list_models=list_models, empirical_variogram=empirical_variogram,
                                              bounds=bounds, p0=p0)[1]
@@ -2059,7 +2061,7 @@ def patches_method(values: np.ndarray, gsd: float, area: float, mask: Optional[n
     return df_all
 
 
-def plot_variogram(df: pd.DataFrame, list_fit_fun: Optional[list[Callable[[float], float]]] = None,
+def plot_variogram(df: pd.DataFrame, list_fit_fun: Optional[list[Callable[[np.ndarray], np.ndarray]]] = None,
                    list_fit_fun_label: Optional[list[str]] = None, ax: matplotlib.axes.Axes | None = None,
                    xscale='linear', xscale_range_split: Optional[list] = None,
                    xlabel = None, ylabel = None, xlim = None, ylim = None):
@@ -2210,12 +2212,15 @@ def plot_variogram(df: pd.DataFrame, list_fit_fun: Optional[list[Callable[[float
         if ylim is not None:
             ax1.set_ylim(ylim)
         else:
-            ax1.set_ylim((0, np.nanmax(df.exp)+np.nanmean(df.err_exp)))
+            if np.all(np.isnan(df.err_exp)):
+                ax1.set_ylim((0, 1.05*np.nanmax(df.exp)))
+            else:
+                ax1.set_ylim((0, np.nanmax(df.exp)+np.nanmean(df.err_exp)))
 
         if k == int(nb_subpanels/2):
             ax1.set_xlabel(xlabel)
         if k == nb_subpanels - 1:
-            ax1.legend(loc='best')
+            ax1.legend(loc='lower right')
         if k == 0:
             ax1.set_ylabel(ylabel)
         else:

From a82cd1c84314dc0bd295f70ca9a673c5c2d80142 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Sun, 21 Aug 2022 18:19:52 +0200
Subject: [PATCH 08/21] Add spatial error propagation function and tests

---
 tests/test_spatialstats.py | 30 +++++++++++++++++++++
 xdem/spatialstats.py       | 53 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index f2749b6e..209b944c 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -515,6 +515,36 @@ def test_number_effective_samples(self):
             xdem.spatialstats.number_effective_samples(area=outlines_brom, params_variogram_model=params_variogram_model,
                                                        rasterize_resolution=(10, 10))
 
+    def test_spatial_error_propagation(self):
+        """Test that the spatial error propagation wrapper function runs properly"""
+
+        ref, diff, mask_glacier, vector_glacier = load_ref_and_diff()
+
+        # Get the error map and variogram model with standardization
+        slope, maxc = xdem.terrain.get_terrain_attribute(ref, attribute=['slope', 'maximum_curvature'])
+        errors = xdem.spatialstats.infer_heteroscedasticy_from_stable(
+            dvalues=diff, list_var=[slope, maxc], unstable_mask=vector_glacier)[0]
+        # Standardize the differences
+        zscores = diff / errors
+        params_variogram_model = xdem.spatialstats.infer_spatial_correlation_from_stable(
+            dvalues=zscores, list_models=['Gaussian', 'Spherical'], unstable_mask=vector_glacier, subsample=100)[1]
+
+        # Run the function with vector areas
+        areas_vector = [vector_glacier.ds[vector_glacier.ds['NAME']=='Brombreen'],
+                        vector_glacier.ds[vector_glacier.ds['NAME']=='Medalsbreen']]
+
+        list_stderr_vec = xdem.spatialstats.spatial_error_propagation(areas=areas_vector, errors=errors,
+                                                               params_variogram_model=params_variogram_model)
+
+        # Run the function with numeric areas (sum needed for Medalsbreen that has two separate polygons)
+        areas_numeric = [np.sum(area_vec.area.values) for area_vec in areas_vector]
+        list_stderr = xdem.spatialstats.spatial_error_propagation(areas=areas_numeric, errors=errors,
+                                                                  params_variogram_model=params_variogram_model)
+
+        # Check that the outputs are consistent: the numeric method should give smaller neff
+        for i in range(2):
+            assert list_stderr_vec[i] > list_stderr[i]
+
 
 class TestSubSampling:
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 0f75960a..37d5e39d 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1884,6 +1884,59 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
 
     return neff
 
+def spatial_error_propagation(areas: list[float | VectorType | gpd.GeoDataFrame],
+                              errors: RasterType,
+                              params_variogram_model: pd.Dataframe,
+                              **kwargs) -> list[float]:
+    """
+    Spatial propagation of elevation errors to an area using the estimated heteroscedasticity and spatial correlations.
+
+    This function is based on the `number_effective_samples` function to estimate uncorrelated samples. If given a
+    vector area, it uses Equation 18 of Hugonnet et al. (2022), https://doi.org/10.1109/jstars.2022.3188922. If given
+    a numeric area, it uses a generalization of Rolstad et al. (2009), http://dx.doi.org/10.3189/002214309789470950.
+
+    The standard error SE (1-sigma) is then computed as SE = mean(SD) / Neff, where mean(SD) is the mean of errors in
+    the area of interest which accounts for heteroscedasticity, and Neff is the number of effective samples.
+
+
+    :param areas: Area of interest either as a numeric value of surface in the same unit as the variogram ranges (will
+    assume a circular shape), or as a vector (shapefile) of the area
+    :param errors: Errors from heteroscedasticity estimation and modelling, as an array or Raster
+    :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
+        (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
+        sills (e.g., [0.8, 0.2]) and "smooth" for the smoothness parameter if it exists for this model (e.g.,
+        [None, 0.2]).
+    :param kwargs: Keyword argument to pass to the `neff_hugonnet_approx` function.
+
+    :return: List of standard errors (1-sigma) for the input areas
+    """
+
+    standard_errors = []
+    errors_arr = get_array_and_mask(errors)[0]
+    for area in areas:
+        # We estimate the number of effective samples in the area
+        neff = number_effective_samples(area=area, params_variogram_model=params_variogram_model,
+                                        rasterize_resolution=errors, **kwargs)
+
+        # We compute the average error in this area
+        # If the area is only a value, take the average error over the entire Raster
+        if isinstance(area, float):
+            average_spread = np.nanmean(errors_arr)
+        else:
+            if isinstance(area, gpd.GeoDataFrame):
+                area_vector = Vector(area)
+            else:
+                area_vector = area
+            area_mask = area_vector.create_mask(errors).squeeze()
+
+            average_spread = np.nanmean(errors_arr[area_mask])
+
+        # Compute the standard error from those two values
+        standard_error = average_spread / np.sqrt(neff)
+        standard_errors.append(standard_error)
+
+    return standard_errors
+
 
 def _std_err_finite(std: float, neff_tot: float, neff: float) -> float:
     """

From 3a9b5b2620702b2f7462cf03406513f4171bbec1 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Sun, 21 Aug 2022 19:01:08 +0200
Subject: [PATCH 09/21] Add spatial propagation gallery example

---
 examples/{error_map.py => infer_heterosc.py}  |  5 +-
 ...lation.py => infer_spatial_correlation.py} |  3 +-
 examples/spatial_error_propagation.py         | 66 +++++++++++++++++++
 tests/test_spatialstats.py                    |  4 +-
 4 files changed, 74 insertions(+), 4 deletions(-)
 rename examples/{error_map.py => infer_heterosc.py} (96%)
 rename examples/{spatial_correlation.py => infer_spatial_correlation.py} (98%)
 create mode 100644 examples/spatial_error_propagation.py

diff --git a/examples/error_map.py b/examples/infer_heterosc.py
similarity index 96%
rename from examples/error_map.py
rename to examples/infer_heterosc.py
index 987d9ed1..b4a07e9d 100644
--- a/examples/error_map.py
+++ b/examples/infer_heterosc.py
@@ -8,6 +8,7 @@
 as an error proxy for moving terrain.
 
 **References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 4.
+
 Errors in elevation difference can be converted in elevation errors following Equation 7 (equal if other source of much
 higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision).
 """
@@ -29,14 +30,14 @@
 
 # %%
 # Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain:
-error_map, df_binning, error_function = \
+errors, df_binning, error_function = \
     xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
                                                          list_var_names=['slope', 'maxc'],
                                                          unstable_mask=glacier_outlines)
 
 # %%
 # The first output corresponds to the error map for the DEM (1-sigma):
-error_map.show(vmin=2, vmax=7, cmap='Reds', cb_title='Elevation error (1$\sigma$)')
+errors.show(vmin=2, vmax=7, cmap='Reds', cb_title='Elevation error (1$\sigma$)')
 
 # %%
 # The second output is the dataframe of 2D binning with slope and maximum curvature:
diff --git a/examples/spatial_correlation.py b/examples/infer_spatial_correlation.py
similarity index 98%
rename from examples/spatial_correlation.py
rename to examples/infer_spatial_correlation.py
index 6d21446e..72f72d70 100644
--- a/examples/spatial_correlation.py
+++ b/examples/infer_spatial_correlation.py
@@ -7,7 +7,8 @@
 spatial correlation in elevation error, using a sum of variogram forms to model this correlation, and stable terrain
 as an error proxy for moving terrain.
 
-**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 5.
+**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 5,
+Equation 13–16.
 """
 # sphinx_gallery_thumbnail_number = 1
 import xdem
diff --git a/examples/spatial_error_propagation.py b/examples/spatial_error_propagation.py
new file mode 100644
index 00000000..64bb4b16
--- /dev/null
+++ b/examples/spatial_error_propagation.py
@@ -0,0 +1,66 @@
+"""
+Spatial propagation of errors
+=============================
+
+Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we apply
+the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
+spatial correlation in elevation error, using a sum of variogram forms to model this correlation, and stable terrain
+as an error proxy for moving terrain.
+
+**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. Figure S16, Equations 17–19,
+`Rolstad et al. (2009) <http://dx.doi.org/10.3189/002214309789470950>`_, Equation 8.
+"""
+import numpy as np
+
+import xdem
+import geoutils as gu
+import matplotlib.pyplot as plt
+
+# %%
+# We load the same data, and perform the same calculations on heteroscedasticity and spatial correlations of errors as
+# in the :ref:`sphx_glr_auto_examples_infer_heterosc.py` and :ref:`sphx_glr_auto_examples_infer_spatial_correlation.py`
+# examples.
+
+dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
+ref_dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem"))
+glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
+slope, maximum_curvature = xdem.terrain.get_terrain_attribute(ref_dem, attribute=['slope', 'maximum_curvature'])
+errors, df_binning, error_function = \
+    xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
+                                                         list_var_names=['slope', 'maxc'],
+                                                         unstable_mask=glacier_outlines)
+
+# %%
+# We use the error map to standardize the elevation differences before variogram estimation, as in Equation 12 of
+# Hugonnet et al. (2022), which is more robust as it removes the variance variability due to heteroscedasticity.
+zscores = dh / errors
+emp_variogram, params_variogram_model, spatial_corr_function = \
+    xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=zscores, list_models=['Gaussian', 'Spherical'],
+                                                            unstable_mask=glacier_outlines, random_state=42)
+
+# %%
+# With our estimated heteroscedasticity and spatial correlation, we can now perform the spatial propagation of errors.
+# We select two glaciers intersecting this elevation change map in Svalbard. The best estimation of their standard error
+# is done by directly providing the shapefile, which relies on Equation 18 of Hugonnet et al. (2022).
+
+areas = [glacier_outlines.ds[glacier_outlines.ds['NAME'] == 'Brombreen'],
+                glacier_outlines.ds[glacier_outlines.ds['NAME'] == 'Medalsbreen']]
+stderr_glaciers = xdem.spatialstats.spatial_error_propagation(areas=areas, errors=errors,
+                                                              params_variogram_model=params_variogram_model)
+
+# When passing a numerical area value, we compute an approximation with disk shape from Equation 8 of Rolstad et al. (2009).
+# This approximation is practical to visualize changes in elevation error when averaging over different area sizes,
+# but is less accurate to estimate the standard error of a certain area shape.
+areas = 10**np.linspace(1, 12)
+stderrs = xdem.spatialstats.spatial_error_propagation(areas=areas, errors=errors,
+                                                      params_variogram_model=params_variogram_model)
+plt.plot(areas / 10**6 , stderrs)
+plt.xlabel('Averaging area (km²)')
+plt.ylabel('Standard error (m)')
+plt.vlines(x=[np.pi*params_variogram_model['range'].values[0]**2 / 10**6,
+              np.pi*params_variogram_model['range'].values[1]**2 / 10**6],
+           ymin=np.min(stderrs), ymax=np.max(stderrs), colors='red', linestyles='dashed',
+           label='Area of disk with radius the correlation range')
+plt.xscale('log')
+plt.legend()
+
diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 209b944c..1bb735d7 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -541,9 +541,11 @@ def test_spatial_error_propagation(self):
         list_stderr = xdem.spatialstats.spatial_error_propagation(areas=areas_numeric, errors=errors,
                                                                   params_variogram_model=params_variogram_model)
 
-        # Check that the outputs are consistent: the numeric method should give smaller neff
+        # Check that the outputs are consistent: the numeric method should always give smaller neff, but not too far
+        # off (20% relative) for those two glaciers as their shape is not too different from a disk
         for i in range(2):
             assert list_stderr_vec[i] > list_stderr[i]
+            assert list_stderr_vec[i] == pytest.approx(list_stderr[i], rel=0.2)
 
 
 class TestSubSampling:

From b9e6a38740c6255954aa00648d2f68a6f5977224 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 12:28:11 +0200
Subject: [PATCH 10/21] Incremental commit on updating the documentation

---
 ... => plot_heterosc_estimation_modelling.py} | 36 +++++++++----------
 ...fer_heterosc.py => plot_infer_heterosc.py} | 12 +++----
 ...n.py => plot_infer_spatial_correlation.py} | 14 ++++----
 ...n.py => plot_spatial_error_propagation.py} | 22 ++++++------
 ...=> plot_variogram_estimation_modelling.py} |  7 ++--
 5 files changed, 43 insertions(+), 48 deletions(-)
 rename examples/{heterosc_estimation_modelling.py => plot_heterosc_estimation_modelling.py} (87%)
 rename examples/{infer_heterosc.py => plot_infer_heterosc.py} (84%)
 rename examples/{infer_spatial_correlation.py => plot_infer_spatial_correlation.py} (82%)
 rename examples/{spatial_error_propagation.py => plot_spatial_error_propagation.py} (78%)
 rename examples/{variogram_estimation_modelling.py => plot_variogram_estimation_modelling.py} (98%)

diff --git a/examples/heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
similarity index 87%
rename from examples/heterosc_estimation_modelling.py
rename to examples/plot_heterosc_estimation_modelling.py
index bc764aec..7d6f63c3 100644
--- a/examples/heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -1,23 +1,21 @@
 """
-Non-stationarity of elevation measurement errors
-================================================
-
-Digital elevation models have a precision that can vary with terrain and instrument-related variables. However, quantifying
-this precision is complex and non-stationarities, i.e. variability of the measurement error, has rarely been
-accounted for, with only some studies that used arbitrary filtering thresholds on the slope or other variables (see :ref:`intro`).
-
-Quantifying the non-stationarities in elevation measurement errors is essential to use stable terrain as a proxy for
-assessing the precision on other types of terrain (Hugonnet et al., in prep) and allows to standardize the measurement
-errors to reach a stationary variance, an assumption necessary for spatial statistics (see :ref:`spatialstats`).
-
-Here, we show an example in which we identify terrain-related non-stationarities for a DEM difference at Longyearbyen.
-We quantify those non-stationarities by `binning <https://en.wikipedia.org/wiki/Data_binning>`_ robustly
-in N-dimension using :func:`xdem.spatialstats.nd_binning` and applying a N-dimensional interpolation
-:func:`xdem.spatialstats.interp_nd_binning` to estimate a numerical function of the measurement error and derive the spatial
-distribution of elevation measurement errors of the difference of DEMs.
-
-**Reference**: `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_, applied to the terrain slope
-and quality of stereo-correlation (Equation 1, Extended Data Fig. 3a).
+Estimate and model elevation heteroscedasticity
+===============================================
+
+Digital elevation models have a precision that can vary with terrain and instrument-related variables. This variability
+in variance is called `heteroscedasticy <https://en.wikipedia.org/wiki/Homoscedasticity_and_heteroscedasticity>`_,
+and rarely accounted for in DEM studies (see :ref:`intro`). Quantifying elevation heteroscedasticity is essential to
+use stable terrain as an error proxy for moving terrain, and standardize data towards a stationary variance, necessary
+to apply spatial statistics (see :ref:`spatialstats`).
+
+Here, we show an advanced example in which we look for terrain-dependent explanatory variables to explain the
+heteroscedasticity for a DEM difference at Longyearbyen. We use `data binning <https://en.wikipedia.org/wiki/Data_binning>`_
+and robust statistics in N-dimension with :func:`xdem.spatialstats.nd_binning`, apply a N-dimensional interpolation with
+:func:`xdem.spatialstats.interp_nd_binning`, and scale our interpolant function by :func:`xdem.spatialstats.two_step_standardization`
+to produce an elevation error function.
+
+**References**: `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_, Equation 1, Extended Data Fig.
+3a and `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figs. 4 and S6–S9.
 """
 # sphinx_gallery_thumbnail_number = 8
 import matplotlib.pyplot as plt
diff --git a/examples/infer_heterosc.py b/examples/plot_infer_heterosc.py
similarity index 84%
rename from examples/infer_heterosc.py
rename to examples/plot_infer_heterosc.py
index b4a07e9d..3cbd9cd0 100644
--- a/examples/infer_heterosc.py
+++ b/examples/plot_infer_heterosc.py
@@ -2,12 +2,12 @@
 Elevation error map
 ===================
 
-Digital elevation models have a precision that can vary with terrain and instrument-related variables. Here, we apply
-the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
-variability in elevation error, using terrain slope and maximum curvature as explanatory variables and stable terrain
-as an error proxy for moving terrain.
+Digital elevation models have a precision that can vary with terrain and instrument-related variables. Here, we
+rely on a non-stationary spatial statistics framework to estimate and model this variability in elevation error,
+using terrain slope and maximum curvature as explanatory variables, with stable terrain as an error proxy for moving
+terrain.
 
-**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 4.
+**Reference**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figs. 4 and S6–S9.
 
 Errors in elevation difference can be converted in elevation errors following Equation 7 (equal if other source of much
 higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision).
@@ -54,4 +54,4 @@
 # This pipeline will not always work optimally with default parameters: spread estimates can be affected by skewed
 # distributions, the binning by extreme range of values, some DEMs do not have any error variability with terrain (e.g.,
 # terrestrial photogrammetry). **To learn how to tune more parameters and use the subfunctions, see the gallery example:**
-# :ref:`sphx_glr_auto_examples_heterosc_estimation_modelling.py`!
\ No newline at end of file
+# :ref:`sphx_glr_auto_examples_plot_heterosc_estimation_modelling.py`!
\ No newline at end of file
diff --git a/examples/infer_spatial_correlation.py b/examples/plot_infer_spatial_correlation.py
similarity index 82%
rename from examples/infer_spatial_correlation.py
rename to examples/plot_infer_spatial_correlation.py
index 72f72d70..b6980901 100644
--- a/examples/infer_spatial_correlation.py
+++ b/examples/plot_infer_spatial_correlation.py
@@ -2,13 +2,11 @@
 Spatial correlation of errors
 =============================
 
-Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we apply
-the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
-spatial correlation in elevation error, using a sum of variogram forms to model this correlation, and stable terrain
-as an error proxy for moving terrain.
+Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we
+rely on a non-stationary spatial statistics framework to estimate and model spatial correlations in elevation error.
+We use a sum of variogram forms to model this correlation, with stable terrain as an error proxy for moving terrain.
 
-**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. See in particular Figure 5,
-Equation 13–16.
+**Reference**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figure 5 and Equations 13–16.
 """
 # sphinx_gallery_thumbnail_number = 1
 import xdem
@@ -23,7 +21,7 @@
 
 # %%
 # Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain (*Note: we pass a
-# random_state argument to ensure a fixed random subsampling in this example, useful for result reproducibility*):
+# random_state argument to ensure a fixed, reproducible random subsampling in this example*):
 df_empirical_variogram, df_model_params, spatial_corr_function = \
     xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=dh, list_models=['Gaussian', 'Spherical'],
                                                             unstable_mask=glacier_outlines, random_state=42)
@@ -57,4 +55,4 @@
 # This pipeline will not always work optimally with default parameters: variogram sampling is more robust with a lot of
 # samples but takes long computing times, and the fitting might require multiple tries for forms and possibly bounds
 # and first guesses to help the least-squares optimization. **To learn how to tune more parameters and use the
-# subfunctions, see the gallery example:** :ref:`sphx_glr_auto_examples_variogram_estimation_modelling.py`!
\ No newline at end of file
+# subfunctions, see the gallery example:** :ref:`sphx_glr_auto_examples_plot_variogram_estimation_modelling.py`!
\ No newline at end of file
diff --git a/examples/spatial_error_propagation.py b/examples/plot_spatial_error_propagation.py
similarity index 78%
rename from examples/spatial_error_propagation.py
rename to examples/plot_spatial_error_propagation.py
index 64bb4b16..c846dc69 100644
--- a/examples/spatial_error_propagation.py
+++ b/examples/plot_spatial_error_propagation.py
@@ -1,15 +1,16 @@
 """
-Spatial propagation of errors
-=============================
+Spatial propagation of elevation errors
+=======================================
 
-Digital elevation models have errors that are spatially correlated due to instrument or processing effects. Here, we apply
-the framework of `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ to estimate and model this
-spatial correlation in elevation error, using a sum of variogram forms to model this correlation, and stable terrain
-as an error proxy for moving terrain.
+Propagating elevation errors spatially accounting for heteroscedasticity and spatial correlation is complex. It
+requires computing the pairwise correlations between all points of an area of interest (be it for a sum, mean, or
+other operation), which is computationally intensive. Here, we rely on published formulations to perform
+computationally-efficient spatial propagation for the mean of elevation (or elevation differences) in an area.
 
 **References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. Figure S16, Equations 17–19,
 `Rolstad et al. (2009) <http://dx.doi.org/10.3189/002214309789470950>`_, Equation 8.
 """
+# sphinx_gallery_thumbnail_number = 1
 import numpy as np
 
 import xdem
@@ -31,7 +32,7 @@
                                                          unstable_mask=glacier_outlines)
 
 # %%
-# We use the error map to standardize the elevation differences before variogram estimation, as in Equation 12 of
+# We use the error map to standardize the elevation differences before variogram estimation, following Equation 12 of
 # Hugonnet et al. (2022), which is more robust as it removes the variance variability due to heteroscedasticity.
 zscores = dh / errors
 emp_variogram, params_variogram_model, spatial_corr_function = \
@@ -42,15 +43,14 @@
 # With our estimated heteroscedasticity and spatial correlation, we can now perform the spatial propagation of errors.
 # We select two glaciers intersecting this elevation change map in Svalbard. The best estimation of their standard error
 # is done by directly providing the shapefile, which relies on Equation 18 of Hugonnet et al. (2022).
-
 areas = [glacier_outlines.ds[glacier_outlines.ds['NAME'] == 'Brombreen'],
                 glacier_outlines.ds[glacier_outlines.ds['NAME'] == 'Medalsbreen']]
 stderr_glaciers = xdem.spatialstats.spatial_error_propagation(areas=areas, errors=errors,
                                                               params_variogram_model=params_variogram_model)
 
-# When passing a numerical area value, we compute an approximation with disk shape from Equation 8 of Rolstad et al. (2009).
-# This approximation is practical to visualize changes in elevation error when averaging over different area sizes,
-# but is less accurate to estimate the standard error of a certain area shape.
+# When passing a numerical area value, we compute an approximation with disk shape from Equation 8 of Rolstad et al.
+# (2009). This approximation is practical to visualize changes in elevation error when averaging over different area
+# sizes, but is less accurate to estimate the standard error of a certain area shape.
 areas = 10**np.linspace(1, 12)
 stderrs = xdem.spatialstats.spatial_error_propagation(areas=areas, errors=errors,
                                                       params_variogram_model=params_variogram_model)
diff --git a/examples/variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
similarity index 98%
rename from examples/variogram_estimation_modelling.py
rename to examples/plot_variogram_estimation_modelling.py
index 44569cda..90047561 100644
--- a/examples/variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -86,8 +86,7 @@
 # large grid data in `scikit-gstat <https://mmaelicke.github.io/scikit-gstat/index.html>`_, which are encapsulated
 # conveniently by :func:`xdem.spatialstats.sample_empirical_variogram`:
 
-df = xdem.spatialstats.sample_empirical_variogram(
-    values=dh.data, gsd=dh.res[0], subsample=300, n_variograms=10, random_state=42)
+df = xdem.spatialstats.sample_empirical_variogram(values=dh.data, gsd=dh.res[0], subsample=100, n_variograms=10, random_state=42)
 
 # %%
 # *Note: in this example, we add a* ``random_state`` *argument to yield a reproducible random sampling of pixels within
@@ -169,7 +168,7 @@
 for area_emp in areas_emp:
 
     #  First, sample intensively circular patches of a given area, and derive the mean elevation differences
-    df_patches = xdem.spatialstats.patches_method(dh.data.data, gsd=dh.res[0], area=area_emp, n_patches=200, random_state=42)
+    df_patches = xdem.spatialstats.patches_method(dh.data.data, gsd=dh.res[0], area=area_emp, n_patches=100, random_state=42)
     # Second, estimate the dispersion of the means of each patch, i.e. the standard error of the mean
     stderr_empirical = np.nanstd(df_patches['nanmedian'].values)
     list_stderr_empirical.append(stderr_empirical)
@@ -219,7 +218,7 @@
 for area in areas:
 
     # For a double-range model
-    neff_doublerange = xdem.spatialstats.neff_circular_approx_numerical(area = area, params_variogram_model = params_vgm2)
+    neff_doublerange = xdem.spatialstats.neff_circular_approx_numerical(area=area, params_variogram_model=params_vgm2)
 
     # About 5% of the variance might be fully correlated, the other 95% has the random part that we quantified
     stderr_fullycorr = np.sqrt(0.05*np.nanvar(dh.data))

From 198026d06549fd7e8545ab1100890eab688e20f7 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 13:24:40 +0200
Subject: [PATCH 11/21] Move nd column in nd_binning output tofirst position

---
 xdem/spatialstats.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 37d5e39d..717c8fa9 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -98,15 +98,15 @@ def nd_binning(values: np.ndarray, list_var: Iterable[np.ndarray], list_var_name
     list_df_1d = []
     for i, var in enumerate(list_var):
         df_stats_1d = pd.DataFrame()
-        # get statistics
+        # Get statistics
         for j, statistic in enumerate(statistics):
             stats_binned_1d, bedges_1d = binned_statistic(var,values,statistic=statistic,bins=list_var_bins[i],range=list_ranges)[:2]
-            # save in a dataframe
+            # Save in a dataframe
             df_stats_1d[statistics_name[j]] = stats_binned_1d
-        # we need to get the middle of the bins from the edges, to get the same dimension length
+        # We need to get the middle of the bins from the edges, to get the same dimension length
         df_stats_1d[list_var_names[i]] = pd.IntervalIndex.from_breaks(bedges_1d,closed='left')
-        # report number of dimensions used
-        df_stats_1d['nd'] = 1
+        # Report number of dimensions used
+        df_stats_1d.insert(0, 'nd', 1)
 
         list_df_1d.append(df_stats_1d)
 
@@ -116,22 +116,22 @@ def nd_binning(values: np.ndarray, list_var: Iterable[np.ndarray], list_var_name
         combs = list(itertools.combinations(list_var_names, 2))
         for i, comb in enumerate(combs):
             var1_name, var2_name = comb
-            # corresponding variables indexes
+            # Corresponding variables indexes
             i1, i2 = list_var_names.index(var1_name), list_var_names.index(var2_name)
             df_stats_2d = pd.DataFrame()
             for j, statistic in enumerate(statistics):
                 stats_binned_2d, bedges_var1, bedges_var2 = binned_statistic_2d(list_var[i1],list_var[i2],values,statistic=statistic
                                                              ,bins=[list_var_bins[i1],list_var_bins[i2]]
                                                              ,range=list_ranges)[:3]
-                # get statistics
+                # Get statistics
                 df_stats_2d[statistics_name[j]] = stats_binned_2d.flatten()
-            # derive interval indexes and convert bins into 2d indexes
+            # Derive interval indexes and convert bins into 2d indexes
             ii1 = pd.IntervalIndex.from_breaks(bedges_var1,closed='left')
             ii2 = pd.IntervalIndex.from_breaks(bedges_var2,closed='left')
             df_stats_2d[var1_name] = [i1 for i1 in ii1 for i2 in ii2]
             df_stats_2d[var2_name] = [i2 for i1 in ii1 for i2 in ii2]
-            # report number of dimensions used
-            df_stats_2d['nd'] = 2
+            # Report number of dimensions used
+            df_stats_2d.insert(0, 'nd', 2)
 
             list_df_2d.append(df_stats_2d)
 
@@ -153,7 +153,7 @@ def nd_binning(values: np.ndarray, list_var: Iterable[np.ndarray], list_var_name
             df_stats_nd[var_name] = iind[i].flatten()
 
         # Report number of dimensions used
-        df_stats_nd['nd'] = len(list_var_names)
+        df_stats_nd.insert(0, 'nd', len(list_var_names))
 
     # Concatenate everything
     list_all_dfs = list_df_1d + list_df_2d + [df_stats_nd]

From dbc3d309b4faf03453c3b8cb239721c646ffd26b Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 14:04:54 +0200
Subject: [PATCH 12/21] Corrections and update of previous non-stationarity
 example

---
 .../plot_heterosc_estimation_modelling.py     | 110 +++++++++++-------
 examples/plot_infer_heterosc.py               |  11 +-
 examples/plot_infer_spatial_correlation.py    |   8 +-
 .../plot_variogram_estimation_modelling.py    |   4 +-
 4 files changed, 77 insertions(+), 56 deletions(-)

diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index 7d6f63c3..ccd47cb5 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -11,11 +11,15 @@
 Here, we show an advanced example in which we look for terrain-dependent explanatory variables to explain the
 heteroscedasticity for a DEM difference at Longyearbyen. We use `data binning <https://en.wikipedia.org/wiki/Data_binning>`_
 and robust statistics in N-dimension with :func:`xdem.spatialstats.nd_binning`, apply a N-dimensional interpolation with
-:func:`xdem.spatialstats.interp_nd_binning`, and scale our interpolant function by :func:`xdem.spatialstats.two_step_standardization`
-to produce an elevation error function.
+:func:`xdem.spatialstats.interp_nd_binning`, and scale our interpolant function with a two-step standardization
+ :func:`xdem.spatialstats.two_step_standardization` to produce the final elevation error function.
 
 **References**: `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_, Equation 1, Extended Data Fig.
 3a and `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figs. 4 and S6–S9.
+
+Errors in elevation difference can be converted in elevation errors following Equation 7 (equal if other source of much
+higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision). Below we consider errors
+in elevation differences.
 """
 # sphinx_gallery_thumbnail_number = 8
 import matplotlib.pyplot as plt
@@ -24,10 +28,8 @@
 import geoutils as gu
 
 # %%
-# We start by loading example files including a difference of DEMs at Longyearbyen, the reference DEM later used to derive
-# several terrain attributes, and the outlines to rasterize a glacier mask.
-# Prior to differencing, the DEMs were aligned using :ref:`coregistration_nuthkaab` as shown in
-# the :ref:`sphx_glr_auto_examples_plot_nuth_kaab.py` example. We later refer to those elevation differences as *dh*.
+# Here, we detail the steps used by ``xdem.spatialstats.infer_heteroscedasticity_from_stable`` exemplified in
+# :ref:`sphx_glr_auto_examples_plot_infer_heterosc.py`. First, we load example files and create a glacier mask.
 
 ref_dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem"))
 dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
@@ -35,17 +37,15 @@
 mask_glacier = glacier_outlines.create_mask(dh)
 
 # %%
-# We use the reference DEM to derive terrain variables such as slope, aspect, curvature (see :ref:`sphx_glr_auto_examples_plot_terrain_attributes.py`)
-# that we'll use to explore potential non-stationarities in elevation measurement error
-
-# We compute the slope, aspect, and both plan and profile curvatures:
+# We derive terrain attributes from the reference DEM (see :ref:`sphx_glr_auto_examples_plot_terrain_attributes.py`),
+# which we will use to explore the variability in elevation error.
 slope, aspect, planc, profc = \
     xdem.terrain.get_terrain_attribute(dem=ref_dem.data,
                                        attribute=['slope','aspect', 'planform_curvature', 'profile_curvature'],
                                        resolution=ref_dem.res)
 
 # %%
-# We remove values on unstable terrain
+# We keep only stable terrain for the analysis of variability
 dh_arr = dh.data[~mask_glacier]
 slope_arr = slope[~mask_glacier]
 aspect_arr = aspect[~mask_glacier]
@@ -77,21 +77,19 @@
 # using :func:`xdem.spatialstats.plot_1d_binning`.
 # We can start with the slope that has been long known to be related to the elevation measurement error (e.g.,
 # `Toutin (2002) <https://doi.org/10.1109/TGRS.2002.802878>`_).
-xdem.spatialstats.plot_1d_binning(df, 'slope', 'nmad', 'Slope (degrees)', 'NMAD of dh (m)')
+xdem.spatialstats.plot_1d_binning(df, var_name='slope', statistic_name='nmad',
+                                  label_var='Slope (degrees)', label_statistic='NMAD of dh (m)')
 
 # %%
 # We identify a clear variability, with the dispersion estimated from the NMAD increasing from ~2 meters for nearly flat
 # slopes to above 12 meters for slopes steeper than 50°.
-# In statistical terms, such a variability of `variance <https://en.wikipedia.org/wiki/Variance>`_ is referred as
-# `heteroscedasticity <https://en.wikipedia.org/wiki/Heteroscedasticity>`_. Here we observe heteroscedastic elevation
-# differences due to a non-stationarity of variance with the terrain slope.
 #
 # What about the aspect?
 
 xdem.spatialstats.plot_1d_binning(df, 'aspect', 'nmad', 'Aspect (degrees)', 'NMAD of dh (m)')
 
 # %%
-# There is no variability with the aspect which shows a dispersion averaging 2-3 meters, i.e. that of the complete sample.
+# There is no variability with the aspect that shows a dispersion averaging 2-3 meters, i.e. that of the complete sample.
 #
 # What about the plan curvature?
 
@@ -101,40 +99,40 @@
 # The relation with the plan curvature remains ambiguous.
 # We should better define our bins to avoid sampling bins with too many or too few samples. For this, we can partition
 # the data in quantiles in :func:`xdem.spatialstats.nd_binning`.
-# Note: we need a higher number of bins to work with quantiles and still resolve the edges of the distribution. Thus, as
-# with many dimensions the N dimensional bin size increases exponentially, we avoid binning all variables at the same
-# time and instead bin one at a time.
+# *Note: we need a higher number of bins to work with quantiles and still resolve the edges of the distribution. As
+# with many dimensions the ND bin size increases exponentially, we avoid binning all variables at the same
+# time and instead bin one at a time.*
 # We define 1000 quantile bins of size 0.001 (equivalent to 0.1% percentile bins) for the profile curvature:
 
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[profc_arr], list_var_names=['profc'],
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
-                                  list_var_bins=[np.nanquantile(profc_arr,np.linspace(0,1,1000))])
+                                  list_var_bins=[np.nanquantile(profc_arr, np.linspace(0,1,1000))])
 xdem.spatialstats.plot_1d_binning(df, 'profc', 'nmad', 'Profile curvature (100 m$^{-1}$)', 'NMAD of dh (m)')
 
 # %%
-# We now clearly identify the variability with the profile curvature, from 2 meters for low curvatures to above 4 meters
+# We clearly identify a variability with the profile curvature, from 2 meters for low curvatures to above 4 meters
 # for higher positive or negative curvature.
+#
 # What about the role of the plan curvature?
 
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[planc_arr], list_var_names=['planc'],
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
-                                  list_var_bins=[np.nanquantile(planc_arr,np.linspace(0,1,1000))])
+                                  list_var_bins=[np.nanquantile(planc_arr, np.linspace(0,1,1000))])
 xdem.spatialstats.plot_1d_binning(df, 'planc', 'nmad', 'Planform curvature (100 m$^{-1}$)', 'NMAD of dh (m)')
 
 # %%
 # The plan curvature shows a similar relation. Those are symmetrical with 0, and almost equal for both types of curvature.
 # To simplify the analysis, we here combine those curvatures into the maximum absolute curvature:
 
-# Derive maximum absolute curvature
 maxc_arr = np.maximum(np.abs(planc_arr),np.abs(profc_arr))
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[maxc_arr], list_var_names=['maxc'],
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
-                                  list_var_bins=[np.nanquantile(maxc_arr,np.linspace(0,1,1000))])
+                                  list_var_bins=[np.nanquantile(maxc_arr, np.linspace(0,1,1000))])
 xdem.spatialstats.plot_1d_binning(df, 'maxc', 'nmad', 'Maximum absolute curvature (100 m$^{-1}$)', 'NMAD of dh (m)')
 
 # %%
 # Here's our simplified relation! We now have both slope and maximum absolute curvature with clear variability of
-# the elevation measurement error.
+# the elevation error.
 #
 # **But, one might wonder: high curvatures might occur more often around steep slopes than flat slope,
 # so what if those two dependencies are actually one and the same?**
@@ -145,7 +143,10 @@
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
                                   list_var_bins=30)
 
-xdem.spatialstats.plot_2d_binning(df, 'slope', 'maxc', 'nmad', 'Slope (degrees)', 'Maximum absolute curvature (100 m$^{-1}$)', 'NMAD of dh (m)')
+xdem.spatialstats.plot_2d_binning(df, var_name_1='slope', var_name_2='maxc', statistic_name='nmad',
+                                  label_var_name_1='Slope (degrees)',
+                                  label_var_name_2='Maximum absolute curvature (100 m$^{-1}$)',
+                                  label_statistic='NMAD of dh (m)')
 
 # %%
 # We can see that part of the variability seems to be independent, but with the uniform bins it is hard to tell much
@@ -153,18 +154,19 @@
 #
 # If we use custom quantiles for both binning variables, and adjust the plot scale:
 
-custom_bin_slope = np.unique(np.concatenate([np.nanquantile(slope_arr,np.linspace(0,0.95,20)),
-                                             np.nanquantile(slope_arr,np.linspace(0.96,0.99,5)),
-                                             np.nanquantile(slope_arr,np.linspace(0.991,1,10))]))
+custom_bin_slope = np.unique(np.concatenate([np.nanquantile(slope_arr, np.linspace(0,0.95,20)),
+                                             np.nanquantile(slope_arr, np.linspace(0.96,0.99,5)),
+                                             np.nanquantile(slope_arr, np.linspace(0.991,1,10))]))
 
-custom_bin_curvature = np.unique(np.concatenate([np.nanquantile(maxc_arr,np.linspace(0,0.95,20)),
-                                             np.nanquantile(maxc_arr,np.linspace(0.96,0.99,5)),
-                                             np.nanquantile(maxc_arr,np.linspace(0.991,1,10))]))
+custom_bin_curvature = np.unique(np.concatenate([np.nanquantile(maxc_arr, np.linspace(0,0.95,20)),
+                                             np.nanquantile(maxc_arr, np.linspace(0.96,0.99,5)),
+                                             np.nanquantile(maxc_arr, np.linspace(0.991,1,10))]))
 
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[slope_arr, maxc_arr], list_var_names=['slope', 'maxc'],
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
                                   list_var_bins=[custom_bin_slope,custom_bin_curvature])
-xdem.spatialstats.plot_2d_binning(df, 'slope', 'maxc', 'nmad', 'Slope (degrees)', 'Maximum absolute curvature (100 m$^{-1}$)', 'NMAD of dh (m)', scale_var_2='log', vmin=2, vmax=10)
+xdem.spatialstats.plot_2d_binning(df, 'slope', 'maxc', 'nmad', 'Slope (degrees)',
+                                  'Maximum absolute curvature (100 m$^{-1}$)', 'NMAD of dh (m)', scale_var_2='log', vmin=2, vmax=10)
 
 
 # %%
@@ -180,22 +182,35 @@
 # approximation i.e. a piecewise linear interpolation/extrapolation based on the binning results.
 # To ensure that only robust statistic values are used in the interpolation, we set a ``min_count`` value at 30 samples.
 
-slope_curv_to_dh_err = xdem.spatialstats.interp_nd_binning(df, list_var_names=['slope', 'maxc'], statistic='nmad', min_count=30)
+unscaled_dh_err_fun = xdem.spatialstats.interp_nd_binning(df, list_var_names=['slope', 'maxc'],
+                                                           statistic='nmad', min_count=30)
 
 # %%
-# The output is an interpolant function of slope and curvature that we can use to estimate the elevation measurement
-# error at any point.
+# The output is an interpolant function of slope and curvature that predicts the elevation error at any point. However,
+# this predicted error might have a spread slightly off from that of the data:
 #
-# For instance:
+# We compare the spread of the elevation difference on stable terrain and the average predicted error:
+dh_err_stable = unscaled_dh_err_fun((slope_arr, maxc_arr))
+
+print('The spread of elevation difference is {:.2f} '
+      'compared to a mean predicted elevation error of {:.2f}.'.format(xdem.spatialstats.nmad(dh_arr),
+                                                                       np.nanmean(dh_err_stable)))
+
+# %%
+# Thus, we rescale the function to exactly match the spread on stable terrain using the
+# ``xdem.spatialstats.two_step_standardization`` function, and get our final error function.
+
+dh_err_fun = xdem.spatialstats.two_step_standardization(dh_arr, list_var=[slope_arr, maxc_arr],
+                                                           unscaled_error_fun=unscaled_dh_err_fun)
 
-for s, c in [(0.,0.1), (50.,0.1), (0.,20.), (50.,20.)]:
+for s, c in [(0., 0.1), (50., 0.1), (0., 20.), (50., 20.)]:
     print('Elevation measurement error for slope of {0:.0f} degrees, '
-          'curvature of {1:.2f} m-1: {2:.1f}'.format(s, c/100, slope_curv_to_dh_err((s,c)))+ ' meters.')
+          'curvature of {1:.2f} m-1: {2:.1f}'.format(s, c/100, dh_err_fun((s, c)))+ ' meters.')
 
 # %%
-# The same function can be used to estimate the spatial distribution of the elevation measurement error over the area:
+# This function can be used to estimate the spatial distribution of the elevation error on the extent of our DEMs:
 maxc = np.maximum(np.abs(profc), np.abs(planc))
-dh_err = slope_curv_to_dh_err((slope, maxc))
+errors = dh_err_fun((slope, maxc))
 
 plt.figure(figsize=(8, 5))
 plt_extent = [
@@ -204,7 +219,14 @@
     ref_dem.bounds.bottom,
     ref_dem.bounds.top,
 ]
-plt.imshow(dh_err.squeeze(), cmap="Reds", vmin=2, vmax=8, extent=plt_extent)
+plt.imshow(errors.squeeze(), cmap="Reds", vmin=2, vmax=8, extent=plt_extent)
 cbar = plt.colorbar()
-cbar.set_label('Elevation measurement error (m)')
-plt.show()
\ No newline at end of file
+cbar.set_label('Elevation error ($1\sigma$, m)')
+plt.show()
+
+# %%
+# These 3 steps can be done in one go using the ``xdem.spatialstats.estimate_model_heteroscedasticity`` function, which
+# wraps those three funtions, expecting ``np.ndarray`` inputs subset to stable terrain.
+
+df, dh_err_fun = xdem.spatialstats.estimate_model_heteroscedasticity(dvalues=dh_arr, list_var=[slope_arr, maxc_arr],
+                                                                     list_var_names=['slope', 'maxc'])
diff --git a/examples/plot_infer_heterosc.py b/examples/plot_infer_heterosc.py
index 3cbd9cd0..8f920c95 100644
--- a/examples/plot_infer_heterosc.py
+++ b/examples/plot_infer_heterosc.py
@@ -10,7 +10,8 @@
 **Reference**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figs. 4 and S6–S9.
 
 Errors in elevation difference can be converted in elevation errors following Equation 7 (equal if other source of much
-higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision).
+higher precision) or Equation 8 (divided by sqrt(2) if the two sources are of same precision). Below we consider errors
+in elevation differences.
 """
 # sphinx_gallery_thumbnail_number = 1
 import xdem
@@ -37,7 +38,7 @@
 
 # %%
 # The first output corresponds to the error map for the DEM (1-sigma):
-errors.show(vmin=2, vmax=7, cmap='Reds', cb_title='Elevation error (1$\sigma$)')
+errors.show(vmin=2, vmax=7, cmap='Reds', cb_title='Elevation error (1$\sigma$, m)')
 
 # %%
 # The second output is the dataframe of 2D binning with slope and maximum curvature:
@@ -46,9 +47,9 @@
 # %%
 # The third output is the 2D binning interpolant, i.e. an error function with the slope and maximum curvature
 # (*Note: below we multiply the maximum curvature by 100 to convert it in m-1*):
-print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(0, 0, error_function((0, 0))))
-print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(40, 0, error_function((40, 0))))
-print('Error for a slope of {:.0f} degrees and {:.0f} m-1 max. curvature: {:.1f} m'.format(0, 100*5, error_function((0, 5))))
+for slope, maxc in [(0, 0), (40, 0), (0, 5), (40, 5)]:
+    print('Error for a slope of {:.0f} degrees and'
+          ' {:.0f} m-1 max. curvature: {:.1f} m'.format(slope, maxc * 100., error_function((slope, maxc))))
 
 # %%
 # This pipeline will not always work optimally with default parameters: spread estimates can be affected by skewed
diff --git a/examples/plot_infer_spatial_correlation.py b/examples/plot_infer_spatial_correlation.py
index b6980901..0f0d6c71 100644
--- a/examples/plot_infer_spatial_correlation.py
+++ b/examples/plot_infer_spatial_correlation.py
@@ -37,11 +37,9 @@
 
 # %%
 # The third output is the spatial correlation function with spatial lags, derived from the variogram:
-print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(0)*100, 0))
-print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(100)*100, 100))
-print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(1000)*100, 1000))
-print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(10000)*100, 10000))
-print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.format(spatial_corr_function(30000)*100, 30000))
+for spatial_lag in [0, 100, 1000, 10000, 30000]:
+    print('Errors are correlated at {:.1f}% for a {:,.0f} m spatial lag'.
+          format(spatial_corr_function(spatial_lag)*100, spatial_lag))
 
 # %%
 # We can plot the empirical variogram and its model on a non-linear X-axis to identify the multi-scale correlations.
diff --git a/examples/plot_variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
index 90047561..09e6ab48 100644
--- a/examples/plot_variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -1,6 +1,6 @@
 """
-Spatial correlation of elevation measurement errors
-===================================================
+Estimate and model spatial variograms
+=====================================
 
 Digital elevation models have elevation measurement errors that can vary with terrain or instrument-related variables
 (see :ref:`sphx_glr_auto_examples_plot_nonstationary_error.py`), but those measurement errors are also often

From 5ee882c0d7f10ac706fcaa9ca2b4a88884742915 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 14:43:36 +0200
Subject: [PATCH 13/21] Update spatial correlation advanced example

---
 examples/plot_icp_coregistration.py           |  4 +-
 examples/plot_nuth_kaab.py                    |  4 +-
 .../plot_variogram_estimation_modelling.py    | 59 ++++++++-----------
 3 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/examples/plot_icp_coregistration.py b/examples/plot_icp_coregistration.py
index 0d8b4caf..c1d3495b 100644
--- a/examples/plot_icp_coregistration.py
+++ b/examples/plot_icp_coregistration.py
@@ -1,6 +1,6 @@
 """
-Iterative Closest Point (ICP) coregistration.
-=============================================
+Iterative Closest Point coregistration
+======================================
 Some DEMs may for one or more reason be erroneously rotated in the X, Y or Z directions.
 Established coregistration approaches like :ref:`coregistration_nuthkaab` work great for X, Y and Z *translations*, but rotation is not accounted for at all.
 
diff --git a/examples/plot_nuth_kaab.py b/examples/plot_nuth_kaab.py
index edfa4fc6..4de9a509 100644
--- a/examples/plot_nuth_kaab.py
+++ b/examples/plot_nuth_kaab.py
@@ -1,6 +1,6 @@
 """
-Nuth & Kääb (2011) coregistration
-=================================
+Nuth and Kääb coregistration
+============================
 
 Nuth and Kääb (`2011 <https:https://doi.org/10.5194/tc-5-271-2011>`_) coregistration allows for horizontal and vertical shifts to be estimated and corrected for.
 In ``xdem``, this approach is implemented through the :class:`xdem.coreg.NuthKaab` class.
diff --git a/examples/plot_variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
index 09e6ab48..542549b4 100644
--- a/examples/plot_variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -2,29 +2,21 @@
 Estimate and model spatial variograms
 =====================================
 
-Digital elevation models have elevation measurement errors that can vary with terrain or instrument-related variables
-(see :ref:`sphx_glr_auto_examples_plot_nonstationary_error.py`), but those measurement errors are also often
-`correlated in space <https://en.wikipedia.org/wiki/Spatial_analysis#Spatial_auto-correlation>`_.
-While many DEM studies have been using short-range `variogram <https://en.wikipedia.org/wiki/Variogram>`_ to
+Digital elevation models have errors that are often `correlated in space <https://en.wikipedia.org/wiki/Spatial_analysis#Spatial_auto-correlation>`_.
+While many DEM studies used solely short-range `variogram <https://en.wikipedia.org/wiki/Variogram>`_ to
 estimate the correlation of elevation measurement errors (e.g., `Howat et al. (2008) <https://doi.org/10.1029/2008GL034496>`_
 , `Wang and Kääb (2015) <https://doi.org/10.3390/rs70810117>`_), recent studies show that variograms of multiple ranges
-provide larger, more reliable estimates of spatial correlation for DEMs (e.g., `Dehecq et al. (2020) <https://doi.org/10.3389/feart.2020.566802>`_
-, `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_).
-
-Quantifying the spatial correlation in elevation measurement errors is essential to integrate measurement errors over
-an area of interest (e.g, to estimate the error of a mean or sum of samples). Once the spatial correlations are quantified,
-several methods exist to derive the related measurement error integrated in space (`Rolstad et al. (2009) <https://doi.org/10.3189/002214309789470950>`_
-, Hugonnet et al. (in prep)). More details are available in :ref:`spatialstats`.
-
-Here, we show an example in which we estimate spatially integrated elevation measurement errors for a DEM difference at
-Longyearbyen, demonstrated in :ref:`sphx_glr_auto_examples_plot_nuth_kaab.py`. We first quantify the spatial
-correlations using :func:`xdem.spatialstats.sample_empirical_variogram` based on routines of `scikit-gstat
-<https://mmaelicke.github.io/scikit-gstat/index.html>`_. We then model the empirical variogram using a sum of variogram
-models using :func:`xdem.spatialstats.fit_sum_model_variogram`.
-Finally, we integrate the variogram models for varying surface areas to estimate the spatially integrated elevation
-measurement errors using :func:`xdem.spatialstats.neff_circ`, and empirically validate the improved robustness of
-our results using :func:`xdem.spatialstats.patches_method`, an intensive Monte-Carlo sampling approach.
+provide larger, more reliable estimates of spatial correlation for DEMs.
 
+Here, we show an example in which we estimate the spatial correlation for a DEM difference at Longyearbyen, and its
+impact on the standard error with averaging area. We first estimate an empirical variogram with
+:func:`xdem.spatialstats.sample_empirical_variogram` based on routines of `scikit-gstat
+<https://mmaelicke.github.io/scikit-gstat/index.html>`_. We then fit the empirical variogram with a sum of variogram
+models using :func:`xdem.spatialstats.fit_sum_model_variogram`. Finally, we perform spatial propagation for a range of
+averaging area using :func:`xdem.spatialstats.number_effective_samples`, and empirically validate the improved
+robustness of our results using :func:`xdem.spatialstats.patches_method`, an intensive Monte-Carlo sampling approach.
+
+**Reference:** `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figure 5 and Equations 13–16.
 """
 # sphinx_gallery_thumbnail_number = 6
 import matplotlib.pyplot as plt
@@ -33,23 +25,19 @@
 import geoutils as gu
 
 # %%
-# We start by loading example files including a difference of DEMs at Longyearbyen and the outlines to rasterize
-# a glacier mask.
-# Prior to differencing, the DEMs were aligned using :ref:`coregistration_nuthkaab` as shown in
-# the :ref:`sphx_glr_auto_examples_plot_nuth_kaab.py` example. We later refer to those elevation differences as *dh*.
+# We load example files.
 
 dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
 glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
 mask_glacier = glacier_outlines.create_mask(dh)
 
 # %%
-# We remove values on glacier terrain in order to isolate stable terrain, our proxy for elevation measurement errors.
+# We exclude values on glacier terrain in order to isolate stable terrain, our proxy for elevation errors.
 dh.data[mask_glacier] = np.nan
 
 # %%
-# We estimate the average per-pixel elevation measurement error on stable terrain, using both the standard deviation
-# and normalized median absolute deviation. For this example, we do not account for the non-stationarity in elevation
-# measurement errors quantified in :ref:`sphx_glr_auto_examples_plot_nonstationary_error.py`.
+# We estimate the average per-pixel elevation error on stable terrain, using both the standard deviation
+# and normalized median absolute deviation. For this example, we do not account for elevation heteroscedasticity.
 print('STD: {:.2f} meters.'.format(np.nanstd(dh.data)))
 print('NMAD: {:.2f} meters.'.format(xdem.spatialstats.nmad(dh.data)))
 
@@ -64,16 +52,16 @@
 # %%
 # We clearly see that the residual elevation differences on stable terrain are not random. The positive and negative
 # differences (blue and red, respectively) appear correlated over large distances. These correlated errors are what
-# we aim to quantify.
+# we want to estimate and model.
 
 # %%
 # Additionally, we notice that the elevation differences are still polluted by unrealistically large elevation
-# differences near glaciers, probably because the glacier inventory is more recent than the data, and the outlines are too small.
+# differences near glaciers, probably because the glacier inventory is more recent than the data, hence with too small outlines.
 # To remedy this, we filter large elevation differences outside 4 NMAD.
 dh.data[np.abs(dh.data) > 4 * xdem.spatialstats.nmad(dh.data)] = np.nan
 
 # %%
-# We plot the elevation differences after filtering to check that we successively removed the reminaing glacier signals.
+# We plot the elevation differences after filtering to check that we successively removed glacier signals.
 plt.figure(figsize=(8, 5))
 _ = dh.show(ax=plt.gca(), cmap='RdYlBu', vmin=-4, vmax=4, cb_title='Elevation differences (m)')
 
@@ -159,10 +147,9 @@
     list_stderr_doublerange.append(stderr_doublerange)
 
 # %%
-# We add an empirical error based on intensive Monte-Carlo sampling ("patches" method) to validate our results
-# (Dehecq et al. (2020), Hugonnet et al., in prep). This method is implemented in :func:`xdem.spatialstats.patches_method`.
-# Here, we sample fewer areas to avoid for the patches method to run over long processing times, increasing from areas
-# of 5 pixels to areas of 10000 pixels exponentially.
+# We add an empirical error based on intensive Monte-Carlo sampling ("patches" method) to validate our results.
+# This method is implemented in :func:`xdem.spatialstats.patches_method`. Here, we sample fewer areas to avoid for the
+# patches method to run over long processing times, increasing from areas of 5 pixels to areas of 10000 pixels exponentially.
 
 areas_emp = [10 * 400 * 2 ** i for i in range(10)]
 for area_emp in areas_emp:
@@ -186,7 +173,7 @@
 
 # %%
 # *Note: in this example, we add a* ``random_state`` *argument to the patches method to yield a reproducible random
-# sampling, and set* ``n_patches`` *to limit computing time.*
+# sampling, and set* ``n_patches`` *to reduce computing time.*
 
 # %%
 # Using a single-range variogram highly underestimates the measurement error integrated over an area, by over a factor

From 39cb70878d1b84502a15552d74e13d1d29ade04f Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 15:37:15 +0200
Subject: [PATCH 14/21] Fix sphinx syntax

---
 .../plot_heterosc_estimation_modelling.py     |  8 ++---
 examples/plot_infer_heterosc.py               |  2 +-
 examples/plot_spatial_error_propagation.py    |  5 ++-
 examples/plot_standardization.py              | 31 +++++++++----------
 xdem/coreg.py                                 |  5 +--
 xdem/spatialstats.py                          | 13 ++++----
 6 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index ccd47cb5..e60eed56 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -12,7 +12,7 @@
 heteroscedasticity for a DEM difference at Longyearbyen. We use `data binning <https://en.wikipedia.org/wiki/Data_binning>`_
 and robust statistics in N-dimension with :func:`xdem.spatialstats.nd_binning`, apply a N-dimensional interpolation with
 :func:`xdem.spatialstats.interp_nd_binning`, and scale our interpolant function with a two-step standardization
- :func:`xdem.spatialstats.two_step_standardization` to produce the final elevation error function.
+:func:`xdem.spatialstats.two_step_standardization` to produce the final elevation error function.
 
 **References**: `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_, Equation 1, Extended Data Fig.
 3a and `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figs. 4 and S6–S9.
@@ -55,7 +55,7 @@
 # %%
 # We use :func:`xdem.spatialstats.nd_binning` to perform N-dimensional binning on all those terrain variables, with uniform
 # bin length divided by 30. We use the NMAD as a robust measure of `statistical dispersion <https://en.wikipedia.org/wiki/Statistical_dispersion>`_
-#  (see :ref:`robuststats_meanstd`).
+# (see :ref:`robuststats_meanstd`).
 
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[slope_arr, aspect_arr, planc_arr, profc_arr],
                                   list_var_names=['slope','aspect','planc','profc'],
@@ -124,7 +124,7 @@
 # The plan curvature shows a similar relation. Those are symmetrical with 0, and almost equal for both types of curvature.
 # To simplify the analysis, we here combine those curvatures into the maximum absolute curvature:
 
-maxc_arr = np.maximum(np.abs(planc_arr),np.abs(profc_arr))
+maxc_arr = np.maximum(np.abs(planc_arr), np.abs(profc_arr))
 df = xdem.spatialstats.nd_binning(values=dh_arr, list_var=[maxc_arr], list_var_names=['maxc'],
                                   statistics=['count', np.nanmedian, xdem.spatialstats.nmad],
                                   list_var_bins=[np.nanquantile(maxc_arr, np.linspace(0,1,1000))])
@@ -200,7 +200,7 @@
 # Thus, we rescale the function to exactly match the spread on stable terrain using the
 # ``xdem.spatialstats.two_step_standardization`` function, and get our final error function.
 
-dh_err_fun = xdem.spatialstats.two_step_standardization(dh_arr, list_var=[slope_arr, maxc_arr],
+zscores, dh_err_fun = xdem.spatialstats.two_step_standardization(dh_arr, list_var=[slope_arr, maxc_arr],
                                                            unscaled_error_fun=unscaled_dh_err_fun)
 
 for s, c in [(0., 0.1), (50., 0.1), (0., 20.), (50., 20.)]:
diff --git a/examples/plot_infer_heterosc.py b/examples/plot_infer_heterosc.py
index 8f920c95..f89e4f54 100644
--- a/examples/plot_infer_heterosc.py
+++ b/examples/plot_infer_heterosc.py
@@ -49,7 +49,7 @@
 # (*Note: below we multiply the maximum curvature by 100 to convert it in m-1*):
 for slope, maxc in [(0, 0), (40, 0), (0, 5), (40, 5)]:
     print('Error for a slope of {:.0f} degrees and'
-          ' {:.0f} m-1 max. curvature: {:.1f} m'.format(slope, maxc * 100., error_function((slope, maxc))))
+          ' {:.0f} m-1 max. curvature: {:.1f} m'.format(slope, maxc/100, error_function((slope, maxc))))
 
 # %%
 # This pipeline will not always work optimally with default parameters: spread estimates can be affected by skewed
diff --git a/examples/plot_spatial_error_propagation.py b/examples/plot_spatial_error_propagation.py
index c846dc69..85b1dc14 100644
--- a/examples/plot_spatial_error_propagation.py
+++ b/examples/plot_spatial_error_propagation.py
@@ -7,19 +7,18 @@
 other operation), which is computationally intensive. Here, we rely on published formulations to perform
 computationally-efficient spatial propagation for the mean of elevation (or elevation differences) in an area.
 
-**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_. Figure S16, Equations 17–19,
+**References**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Figure S16, Equations 17–19 and
 `Rolstad et al. (2009) <http://dx.doi.org/10.3189/002214309789470950>`_, Equation 8.
 """
 # sphinx_gallery_thumbnail_number = 1
 import numpy as np
-
 import xdem
 import geoutils as gu
 import matplotlib.pyplot as plt
 
 # %%
 # We load the same data, and perform the same calculations on heteroscedasticity and spatial correlations of errors as
-# in the :ref:`sphx_glr_auto_examples_infer_heterosc.py` and :ref:`sphx_glr_auto_examples_infer_spatial_correlation.py`
+# in the :ref:`sphx_glr_auto_examples_plot_infer_heterosc.py` and :ref:`sphx_glr_auto_examples_plot_infer_spatial_correlation.py`
 # examples.
 
 dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
diff --git a/examples/plot_standardization.py b/examples/plot_standardization.py
index adf32fbd..ebad94f0 100644
--- a/examples/plot_standardization.py
+++ b/examples/plot_standardization.py
@@ -1,20 +1,17 @@
 """
-Standardization for stable terrain as proxy
-===========================================
+Standardization for stable terrain as error proxy
+=================================================
 
 Digital elevation models have both a precision that can vary with terrain or instrument-related variables, and
-a spatial correlation of measurement errors that can be due to effects of resolution, processing or instrument noise.
-Accouting for non-stationarities in elevation measurement errors is essential to use stable terrain as a proxy to
-infer the precision on other types of terrain (Hugonnet et al., in prep) and reliably use spatial statistics (see
-:ref:`spatialstats`).
-
-Here, we show an example to use standardization of the data based on the terrain-dependent nonstationarity in measurement
-error (see :ref:`sphx_glr_auto_examples_plot_nonstationary_error.py`) and combine it with an analysis of spatial
-correlation (see :ref:`sphx_glr_auto_examples_plot_vgm_error.py`) to derive spatially integrated errors for specific
-spatial ensembles.
-
-**Reference**: `Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_, applied to the terrain slope
-and quality of stereo-correlation (Equation 1, Extended Data Fig. 3a).
+a spatial correlation of errors that can be due to effects of resolution, processing or instrument noise.
+Accouting for non-stationarities in elevation errors is essential to use stable terrain as a proxy to infer the
+precision on other types of terrain and reliably use spatial statistics (see :ref:`spatialstats`).
+
+Here, we show an example of standardization of the data based on terrain-dependent explanatory variables
+(see :ref:`sphx_glr_auto_examples_plot_infer_heterosc.py`) and combine it with an analysis of spatial correlation
+(see :ref:`sphx_glr_auto_examples_plot_infer_spatial_correlation.py`) .
+
+**Reference**: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, Equation 12.
 """
 # sphinx_gallery_thumbnail_number = 4
 import matplotlib.pyplot as plt
@@ -24,8 +21,8 @@
 from xdem.spatialstats import nmad
 
 # %%
-# We start by estimating the non-stationarities and deriving a terrain-dependent measurement error as a function of both
-# slope and maximum curvature, as shown in the  :ref:`sphx_glr_auto_examples_plot_nonstationary_error.py` example.
+# We start by estimating the elevation heteroscedasticity and deriving a terrain-dependent measurement error as a function of both
+# slope and maximum curvature, as shown in the :ref:`sphx_glr_auto_examples_plot_infer_heterosc.py` example.
 
 # Load the data
 ref_dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem"))
@@ -105,7 +102,7 @@
 plt.show()
 
 # %%
-# Now, we can perform an analysis of spatial correlation as shown in the :ref:`sphx_glr_auto_examples_plot_vgm_error.py`
+# Now, we can perform an analysis of spatial correlation as shown in the :ref:`sphx_glr_auto_examples_plot_variogram_estimation_modelling.py`
 # example, by estimating a variogram and fitting a sum of two models.
 df_vgm = xdem.spatialstats.sample_empirical_variogram(values=z_dh.data.squeeze(), gsd=dh.res[0], subsample=300,
                                                       n_variograms=10, random_state=42)
diff --git a/xdem/coreg.py b/xdem/coreg.py
index 97ee133a..0d418e6c 100644
--- a/xdem/coreg.py
+++ b/xdem/coreg.py
@@ -969,8 +969,9 @@ def __init__(self, degree: int = 1, subsample: Union[int, float] = 5e5):
 
         :param degree: The polynomial degree to estimate. degree=0 is a simple bias correction.
         :param subsample: Factor for subsampling the input raster for speed-up.
-        If <= 1, will be considered a fraction of valid pixels to extract.
-        If > 1 will be considered the number of pixels to extract.
+            If <= 1, will be considered a fraction of valid pixels to extract.
+            If > 1 will be considered the number of pixels to extract.
+
         """
         self.degree = degree
         self.subsample = subsample
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 717c8fa9..43edc049 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1368,13 +1368,13 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
 
     :param dvalues: Proxy values as array or Raster
     :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
-    a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
-    spherical model "Sph", "Spherical" or skgstat.models.spherical).
+        a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
+        spherical model "Sph", "Spherical" or skgstat.models.spherical).
     :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
     :param unstable_mask: Vector shapefile of unstable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
     :param errors: Error values to account for heteroscedasticity (ignored if None).
     :param estimator: Estimator for the empirical variogram; default to Dowd's variogram (see skgstat.Variogram for
-    the list of available estimators).
+        the list of available estimators).
     :param gsd: Ground sampling distance
     :param coords: Coordinates
     :param subsample: Number of samples to randomly draw from the values
@@ -1821,7 +1821,7 @@ def number_effective_samples(area: float | int | VectorType | gpd.GeoDataFrame,
     range to ensure a sufficiently fine grid for propagation of the shortest range.
 
     :param area: Area of interest either as a numeric value of surface in the same unit as the variogram ranges (will
-    assume a circular shape), or as a vector (shapefile) of the area
+        assume a circular shape), or as a vector (shapefile) of the area
     :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
         (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
         sills (e.g., [0.8, 0.2]) and "smooth" for the smoothness parameter if it exists for this model (e.g.,
@@ -1898,10 +1898,9 @@ def spatial_error_propagation(areas: list[float | VectorType | gpd.GeoDataFrame]
     The standard error SE (1-sigma) is then computed as SE = mean(SD) / Neff, where mean(SD) is the mean of errors in
     the area of interest which accounts for heteroscedasticity, and Neff is the number of effective samples.
 
-
     :param areas: Area of interest either as a numeric value of surface in the same unit as the variogram ranges (will
-    assume a circular shape), or as a vector (shapefile) of the area
-    :param errors: Errors from heteroscedasticity estimation and modelling, as an array or Raster
+        assume a circular shape), or as a vector (shapefile) of the area.
+    :param errors: Errors from heteroscedasticity estimation and modelling, as an array or Raster.
     :param params_variogram_model: Dataframe of variogram models to sum with three to four columns, "model" for the model types
         (e.g., ["spherical", "matern"]), "range" for the correlation ranges (e.g., [2, 100]), "psill" for the partial
         sills (e.g., [0.8, 0.2]) and "smooth" for the smoothness parameter if it exists for this model (e.g.,

From e6a072fa7c637ec353737c749669ef2f92a89701 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 16:06:45 +0200
Subject: [PATCH 15/21] Update documentation page on spatial statistics

---
 ...lope.py => spatialstats_heterosc_slope.py} |  0
 docs/source/intro_accuracy_precision.rst      |  6 ++-
 docs/source/spatialstats.rst                  | 41 +++++++++----------
 .../plot_heterosc_estimation_modelling.py     |  4 +-
 .../plot_variogram_estimation_modelling.py    |  4 +-
 5 files changed, 28 insertions(+), 27 deletions(-)
 rename docs/source/code/{spatialstats_nonstationarity_slope.py => spatialstats_heterosc_slope.py} (100%)

diff --git a/docs/source/code/spatialstats_nonstationarity_slope.py b/docs/source/code/spatialstats_heterosc_slope.py
similarity index 100%
rename from docs/source/code/spatialstats_nonstationarity_slope.py
rename to docs/source/code/spatialstats_heterosc_slope.py
diff --git a/docs/source/intro_accuracy_precision.rst b/docs/source/intro_accuracy_precision.rst
index 86466b65..9bcf0a8c 100644
--- a/docs/source/intro_accuracy_precision.rst
+++ b/docs/source/intro_accuracy_precision.rst
@@ -31,6 +31,10 @@ for when analyzing DEMs:
 .. figure:: imgs/precision_accuracy.png
     :width: 80%
 
+*Note: sometimes "accuracy" is also used to describe both types of errors, and "trueness" systematic errors, following
+`ISO 5725-1 <https://www.iso.org/obp/ui/#iso:std:iso:5725:-1:ed-1:v1:en>`_. Here, we used accuracy for systematic errors
+as, to our knowledge, it is more common in remote sensing applications."
+
 Source: `antarcticglaciers.org <http://www.antarcticglaciers.org/glacial-geology/dating-glacial-sediments2/precision-
 and-accuracy-glacial-geology/>`_, accessed 29.06.21.
 
@@ -96,6 +100,6 @@ The tools for quantifying DEM precision are described in :ref:`spatialstats`.
     Functions that are used in several examples create duplicate examples intead of being merged into the list.
     Circumventing manually by selecting functions used only once in each example for now.
 
-.. minigallery:: xdem.spatialstats.neff_circ xdem.spatialstats.plot_1d_binning
+.. minigallery:: xdem.spatialstats.infer_heteroscedasticity_from_stable xdem.spatialstats.infer_spatial_correlation_from_stable
     :add-heading: Examples that use spatial statistics functions
 
diff --git a/docs/source/spatialstats.rst b/docs/source/spatialstats.rst
index dc844a43..2f828dc1 100644
--- a/docs/source/spatialstats.rst
+++ b/docs/source/spatialstats.rst
@@ -6,16 +6,15 @@ Spatial statistics
 Spatial statistics, also referred to as `geostatistics <https://en.wikipedia.org/wiki/Geostatistics>`_, are essential
 for the analysis of observations distributed in space.
 To analyze DEMs, xDEM integrates spatial statistics tools specific to DEMs described in recent literature,
-in particular in `Rolstad et al. (2009) <https://doi.org/10.3189/002214309789470950>`_,
-`Dehecq et al. (2020) <https://doi.org/10.3389/feart.2020.566802>`_ and
-`Hugonnet et al. (2021) <https://doi.org/10.1038/s41586-021-03436-z>`_. The implementation of these methods relies
-partly on the package `scikit-gstat <https://mmaelicke.github.io/scikit-gstat/index.html>`_.
+in particular in `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_ and
+`Rolstad et al. (2009) <https://doi.org/10.3189/002214309789470950>`_. The implementation of these methods relies
+partially on the package `scikit-gstat <https://mmaelicke.github.io/scikit-gstat/index.html>`_.
 
 The spatial statistics tools can be used to assess the precision of DEMs (see the definition of precision in :ref:`intro`).
 In particular, these tools help to:
 
-    - account for non-stationarities of elevation measurement errors (e.g., varying precision of DEMs with terrain slope),
-    - quantify the spatial correlation of measurement errors in DEMs (e.g., native spatial resolution, instrument noise),
+    - account for elevation heteroscedasticity (e.g., varying precision with terrain slope),
+    - quantify the spatial correlation of errors in DEMs (e.g., native spatial resolution, instrument noise),
     - estimate robust errors for observations integrated in space (e.g., average or sum of samples),
     - propagate errors between spatial ensembles at different scales (e.g., sum of glacier volume changes).
 
@@ -73,7 +72,7 @@ Due to the sparsity of synchronous acquisitions, elevation data cannot be easily
 times. Thus, stable terrain is used a proxy to assess the precision of a DEM on all its terrain,
 including moving terrain that is generally of greater interest for analysis.
 
-As shown in Hugonnet et al. (in prep), accounting for :ref:`spatialstats_nonstationarity` is needed to reliably
+As shown in `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_, accounting for :ref:`spatialstats_heterosc` is needed to reliably
 use stable terrain as a proxy for other types of terrain.
 
 .. _spatialstats_metrics:
@@ -99,7 +98,7 @@ To estimate the pixel-wise measurement error for elevation data, two issues aris
     1. The dispersion :math:`\sigma_{dh}` cannot be estimated directly on changing terrain,
     2. The dispersion :math:`\sigma_{dh}` can show important non-stationarities.
 
-The section :ref:`spatialstats_nonstationarity` describes how to quantify the measurement error as a function of
+The section :ref:`spatialstats_heterosc` describes how to quantify the measurement error as a function of
 several explanatory variables by using stable terrain as a proxy.
 
 Spatially-integrated elevation measurement error
@@ -129,23 +128,21 @@ and use those to integrate and propagate measurement errors in space.
 Workflow for DEM precision estimation
 -------------------------------------
 
-.. _spatialstats_nonstationarity:
+.. _spatialstats_heterosc:
 
-Non-stationarity in elevation measurement errors
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Elevation heteroscedasticity
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Elevation data contains significant non-stationarities in elevation measurement errors.
+Elevation data contains significant variability in measurement errors.
 
-xDEM provides tools to **quantify** these non-stationarities along several explanatory variables,
-**model** those numerically to estimate an elevation measurement error, and **standardize** them for further analysis.
+xDEM provides tools to **quantify** this variability using explanatory variables, **model** those numerically to
+estimate a function predicting elevation error, and **standardize** data for further analysis.
 
-Quantify and model non-stationarites
-""""""""""""""""""""""""""""""""""""
+Quantify and model heteroscedasticity
+"""""""""""""""""""""""""""""""""""""
 
-Non-stationarities in elevation measurement errors correspond to a variability of the precision in the elevation
-observations with certain explanatory variables that can be terrain- or instrument-related.
-In statistical terms, it corresponds to an `heteroscedasticity <https://en.wikipedia.org/wiki/Heteroscedasticity>`_
-of elevation observations.
+Elevation `heteroscedasticity <https://en.wikipedia.org/wiki/Heteroscedasticity>`_ corresponds to a variability in
+precision of elevation observations, that are linked to terrain or instrument variables.
 
 .. math::
     \sigma_{dh} = \sigma_{dh}(\textrm{var}_{1},\textrm{var}_{2}, \textrm{...}) \neq \textrm{constant}
@@ -158,7 +155,7 @@ Owing to the large number of samples of elevation data, we can easily estimate t
         :lines: 18-19
         :language: python
 
-.. plot:: code/spatialstats_nonstationarity_slope.py
+.. plot:: code/spatialstats_heterosc_slope.py
     :width: 90%
 
 The most common explanatory variables are:
@@ -167,7 +164,7 @@ The most common explanatory variables are:
     - the quality of stereo-correlation that can explain a large part of the measurement error of DEMs generated by stereophotogrammetry,
     - the interferometric coherence that can explain a large part of the measurement error of DEMs generated by `InSAR <https://en.wikipedia.org/wiki/Interferometric_synthetic-aperture_radar>`_.
 
-Once quantified, the non-stationarities can be modelled numerically by linear interpolation across several
+Once quantified, elevation heteroscedasticity can be modelled numerically by linear interpolation across several
 variables using :func:`xdem.spatialstats.interp_nd_binning`.
 
 .. literalinclude:: code/spatialstats.py
diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index e60eed56..26e6697a 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -1,6 +1,6 @@
 """
-Estimate and model elevation heteroscedasticity
-===============================================
+Estimation and modelling of elevation heteroscedasticity
+========================================================
 
 Digital elevation models have a precision that can vary with terrain and instrument-related variables. This variability
 in variance is called `heteroscedasticy <https://en.wikipedia.org/wiki/Homoscedasticity_and_heteroscedasticity>`_,
diff --git a/examples/plot_variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
index 542549b4..4483e61d 100644
--- a/examples/plot_variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -1,6 +1,6 @@
 """
-Estimate and model spatial variograms
-=====================================
+Estimation and modelling of spatial variograms
+==============================================
 
 Digital elevation models have errors that are often `correlated in space <https://en.wikipedia.org/wiki/Spatial_analysis#Spatial_auto-correlation>`_.
 While many DEM studies used solely short-range `variogram <https://en.wikipedia.org/wiki/Variogram>`_ to

From 2e9d0f66c4d78484c9c103be468637b82392942d Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 16:34:56 +0200
Subject: [PATCH 16/21] Fix tests

---
 docs/source/spatialstats.rst |  6 +++---
 tests/test_spatialstats.py   | 11 ++++++-----
 xdem/spatialstats.py         |  6 ++++--
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/docs/source/spatialstats.rst b/docs/source/spatialstats.rst
index 2f828dc1..c3218110 100644
--- a/docs/source/spatialstats.rst
+++ b/docs/source/spatialstats.rst
@@ -216,8 +216,8 @@ average measurement error of the pixels in the subsample, evaluated through the
 Estimating the standard error of the mean of the standardized data :math:`\sigma_{\overline{z_{dh}}}\vert_{\mathbb{S}}`
 requires an analysis of spatial correlation and a spatial integration of this correlation, described in the next sections.
 
-.. minigallery:: xdem.spatialstats.nd_binning
-        :add-heading: Examples that deal with non-stationarities
+.. minigallery:: xdem.spatialstats.nd_binning xdem.spatialstats.infer_heteroscedasticity_from_stable
+        :add-heading: Examples that deal with elevation heteroscedasticity
         :heading-level: "
 
 .. _spatialstats_corr:
@@ -307,7 +307,7 @@ This can be performed through the function :func:`xdem.spatialstats.fit_sum_mode
         :lines: 31
         :language: python
 
-.. minigallery:: xdem.spatialstats.sample_empirical_variogram
+.. minigallery:: xdem.spatialstats.sample_empirical_variogram xdem.spatialstats.infer_spatial_correlation_from_stable
         :add-heading: Examples that deal with spatial correlations
         :heading-level: "
 
diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 1bb735d7..b65e8adc 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -527,24 +527,25 @@ def test_spatial_error_propagation(self):
         # Standardize the differences
         zscores = diff / errors
         params_variogram_model = xdem.spatialstats.infer_spatial_correlation_from_stable(
-            dvalues=zscores, list_models=['Gaussian', 'Spherical'], unstable_mask=vector_glacier, subsample=100)[1]
+            dvalues=zscores, list_models=['Gaussian', 'Spherical'], unstable_mask=vector_glacier, subsample=100,
+            random_state=42)[1]
 
         # Run the function with vector areas
         areas_vector = [vector_glacier.ds[vector_glacier.ds['NAME']=='Brombreen'],
                         vector_glacier.ds[vector_glacier.ds['NAME']=='Medalsbreen']]
 
         list_stderr_vec = xdem.spatialstats.spatial_error_propagation(areas=areas_vector, errors=errors,
-                                                               params_variogram_model=params_variogram_model)
+                                                                      params_variogram_model=params_variogram_model,
+                                                                      random_state=42)
 
         # Run the function with numeric areas (sum needed for Medalsbreen that has two separate polygons)
         areas_numeric = [np.sum(area_vec.area.values) for area_vec in areas_vector]
         list_stderr = xdem.spatialstats.spatial_error_propagation(areas=areas_numeric, errors=errors,
                                                                   params_variogram_model=params_variogram_model)
 
-        # Check that the outputs are consistent: the numeric method should always give smaller neff, but not too far
-        # off (20% relative) for those two glaciers as their shape is not too different from a disk
+        # Check that the outputs are consistent: the numeric method should always give a neff that is almost the same
+        # (20% relative) for those two glaciers as their shape is not too different from a disk
         for i in range(2):
-            assert list_stderr_vec[i] > list_stderr[i]
             assert list_stderr_vec[i] == pytest.approx(list_stderr[i], rel=0.2)
 
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 43edc049..95e28d4c 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -1063,6 +1063,10 @@ def sample_empirical_variogram(values: Union[np.ndarray, RasterType], gsd: float
         df_mean['count'] = df_count['count']
         df = df_mean
 
+    # Remove the last spatial lag bin which is always undersampled
+    # TODO: Solve this problem at the root: how the spatial lag binning is defined, probably?
+    df.drop(df.tail(1).index, inplace=True)
+
     return df
 
 def _get_skgstat_variogram_model_name(model: str | Callable) -> str:
@@ -1333,8 +1337,6 @@ def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], l
                                                      subsample=subsample, subsample_method=subsample_method,
                                                      n_variograms=n_variograms, n_jobs=n_jobs, verbose=verbose,
                                                      random_state=random_state, **kwargs)
-    # TODO: prevent this last bin with few samples to be returned by `sample_empirical_variogram`
-    empirical_variogram = empirical_variogram[:-1]
 
     params_variogram_model = fit_sum_model_variogram(list_models=list_models, empirical_variogram=empirical_variogram,
                                              bounds=bounds, p0=p0)[1]

From f4066a2bed78793f978155ebe90ec2c730c9662e Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Mon, 22 Aug 2022 17:07:27 +0200
Subject: [PATCH 17/21] Fix typo in infer_heteroscedasticity function name

---
 docs/source/spatialstats.rst               |  4 +-
 examples/plot_infer_heterosc.py            |  6 +--
 examples/plot_spatial_error_propagation.py |  6 +--
 tests/test_spatialstats.py                 | 30 ++++++------
 xdem/spatialstats.py                       | 54 +++++++++++-----------
 5 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/docs/source/spatialstats.rst b/docs/source/spatialstats.rst
index c3218110..6e9ed127 100644
--- a/docs/source/spatialstats.rst
+++ b/docs/source/spatialstats.rst
@@ -216,7 +216,7 @@ average measurement error of the pixels in the subsample, evaluated through the
 Estimating the standard error of the mean of the standardized data :math:`\sigma_{\overline{z_{dh}}}\vert_{\mathbb{S}}`
 requires an analysis of spatial correlation and a spatial integration of this correlation, described in the next sections.
 
-.. minigallery:: xdem.spatialstats.nd_binning xdem.spatialstats.infer_heteroscedasticity_from_stable
+.. minigallery:: xdem.spatialstats.infer_heteroscedasticity_from_stable xdem.spatialstats.nd_binning
         :add-heading: Examples that deal with elevation heteroscedasticity
         :heading-level: "
 
@@ -307,7 +307,7 @@ This can be performed through the function :func:`xdem.spatialstats.fit_sum_mode
         :lines: 31
         :language: python
 
-.. minigallery:: xdem.spatialstats.sample_empirical_variogram xdem.spatialstats.infer_spatial_correlation_from_stable
+.. minigallery:: xdem.spatialstats.infer_spatial_correlation_from_stable xdem.spatialstats.sample_empirical_variogram
         :add-heading: Examples that deal with spatial correlations
         :heading-level: "
 
diff --git a/examples/plot_infer_heterosc.py b/examples/plot_infer_heterosc.py
index f89e4f54..ed602754 100644
--- a/examples/plot_infer_heterosc.py
+++ b/examples/plot_infer_heterosc.py
@@ -32,9 +32,9 @@
 # %%
 # Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain:
 errors, df_binning, error_function = \
-    xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
-                                                         list_var_names=['slope', 'maxc'],
-                                                         unstable_mask=glacier_outlines)
+    xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
+                                                           list_var_names=['slope', 'maxc'],
+                                                           unstable_mask=glacier_outlines)
 
 # %%
 # The first output corresponds to the error map for the DEM (1-sigma):
diff --git a/examples/plot_spatial_error_propagation.py b/examples/plot_spatial_error_propagation.py
index 85b1dc14..6e1fc66c 100644
--- a/examples/plot_spatial_error_propagation.py
+++ b/examples/plot_spatial_error_propagation.py
@@ -26,9 +26,9 @@
 glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
 slope, maximum_curvature = xdem.terrain.get_terrain_attribute(ref_dem, attribute=['slope', 'maximum_curvature'])
 errors, df_binning, error_function = \
-    xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
-                                                         list_var_names=['slope', 'maxc'],
-                                                         unstable_mask=glacier_outlines)
+    xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=dh, list_var=[slope, maximum_curvature],
+                                                           list_var_names=['slope', 'maxc'],
+                                                           unstable_mask=glacier_outlines)
 
 # %%
 # We use the error map to standardize the elevation differences before variogram estimation, following Equation 12 of
diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index b65e8adc..4fe34189 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -522,7 +522,7 @@ def test_spatial_error_propagation(self):
 
         # Get the error map and variogram model with standardization
         slope, maxc = xdem.terrain.get_terrain_attribute(ref, attribute=['slope', 'maximum_curvature'])
-        errors = xdem.spatialstats.infer_heteroscedasticy_from_stable(
+        errors = xdem.spatialstats.infer_heteroscedasticity_from_stable(
             dvalues=diff, list_var=[slope, maxc], unstable_mask=vector_glacier)[0]
         # Standardize the differences
         zscores = diff / errors
@@ -853,9 +853,9 @@ def test_estimate_model_heteroscedasticity_and_infer_from_stable(self):
 
         # Test infer function
         errors_1, df_binning_1, err_fun_1 = \
-            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
-                                                                 list_var=[slope, maximum_curv],
-                                                                 unstable_mask=vector_glacier)
+            xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=diff,
+                                                                   list_var=[slope, maximum_curv],
+                                                                   unstable_mask=vector_glacier)
 
         # Test this gives the same results as when using the base functions
         diff_arr = gu.spatial_tools.get_array_and_mask(diff)[0]
@@ -881,22 +881,22 @@ def test_estimate_model_heteroscedasticity_and_infer_from_stable(self):
 
         # Check that errors are raised with wrong input
         with pytest.raises(ValueError, match='The dvalues must be a Raster or NumPy array.'):
-            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues='not_an_array',
-                                                                 stable_mask=~mask_glacier.squeeze(),
-                                                                 list_var=[slope_arr])
+            xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues='not_an_array',
+                                                                   stable_mask=~mask_glacier.squeeze(),
+                                                                   list_var=[slope_arr])
         with pytest.raises(ValueError, match='The stable mask must be a Vector, GeoDataFrame or NumPy array.'):
-            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
-                                                                 stable_mask='not_a_vector_or_array',
-                                                                 list_var=[slope_arr])
+            xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=diff,
+                                                                   stable_mask='not_a_vector_or_array',
+                                                                   list_var=[slope_arr])
         with pytest.raises(ValueError, match='The unstable mask must be a Vector, GeoDataFrame or NumPy array.'):
-            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff,
-                                                                 unstable_mask='not_a_vector_or_array',
-                                                                 list_var=[slope_arr])
+            xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=diff,
+                                                                   unstable_mask='not_a_vector_or_array',
+                                                                   list_var=[slope_arr])
 
         with pytest.raises(ValueError, match='The stable mask can only passed as a Vector or GeoDataFrame if the input '
                                              'dvalues is a Raster.'):
-            xdem.spatialstats.infer_heteroscedasticy_from_stable(dvalues=diff_arr, stable_mask=vector_glacier,
-                                                                 list_var=[slope_arr])
+            xdem.spatialstats.infer_heteroscedasticity_from_stable(dvalues=diff_arr, stable_mask=vector_glacier,
+                                                                   list_var=[slope_arr])
 
     def test_plot_binning(self):
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 95e28d4c..16c08602 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -365,40 +365,40 @@ def estimate_model_heteroscedasticity(dvalues: np.ndarray, list_var: Iterable[np
 
 
 @overload
-def infer_heteroscedasticy_from_stable(dvalues: np.ndarray , list_var: list[np.ndarray | RasterType],
-                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
-                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
-                                       list_var_names: Iterable[str],
-                                       spread_statistic: Callable,
-                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]],
-                                       min_count: Optional[int],
-                                       factor_spread_exclude_outliers: float | None,
-                                       ) -> tuple[np.ndarray,
+def infer_heteroscedasticity_from_stable(dvalues: np.ndarray, list_var: list[np.ndarray | RasterType],
+                                         stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                         unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                         list_var_names: Iterable[str],
+                                         spread_statistic: Callable,
+                                         list_var_bins: Optional[Union[int,Iterable[Iterable]]],
+                                         min_count: Optional[int],
+                                         factor_spread_exclude_outliers: float | None,
+                                         ) -> tuple[np.ndarray,
                                             pd.DataFrame,
                                             Callable[[tuple[np.ndarray, ...]], np.ndarray]]: ...
 
 @overload
-def infer_heteroscedasticy_from_stable(dvalues: RasterType , list_var: list[np.ndarray | RasterType],
-                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
-                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
-                                       list_var_names: Iterable[str],
-                                       spread_statistic: Callable,
-                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]],
-                                       min_count: Optional[int],
-                                       factor_spread_exclude_outliers: float | None,
-                                       ) -> tuple[RasterType,
+def infer_heteroscedasticity_from_stable(dvalues: RasterType, list_var: list[np.ndarray | RasterType],
+                                         stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                         unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame,
+                                         list_var_names: Iterable[str],
+                                         spread_statistic: Callable,
+                                         list_var_bins: Optional[Union[int,Iterable[Iterable]]],
+                                         min_count: Optional[int],
+                                         factor_spread_exclude_outliers: float | None,
+                                         ) -> tuple[RasterType,
                                             pd.DataFrame,
                                             Callable[[tuple[np.ndarray, ...]], np.ndarray]]: ...
 
-def infer_heteroscedasticy_from_stable(dvalues: np.ndarray | RasterType, list_var: list[np.ndarray | RasterType],
-                                       stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
-                                       unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
-                                       list_var_names: Iterable[str] = None,
-                                       spread_statistic: Callable = nmad,
-                                       list_var_bins: Optional[Union[int,Iterable[Iterable]]] = None,
-                                       min_count: Optional[int] = 100,
-                                       fac_spread_outliers: float | None = 7,
-                                       ) -> tuple[np.ndarray | RasterType,
+def infer_heteroscedasticity_from_stable(dvalues: np.ndarray | RasterType, list_var: list[np.ndarray | RasterType],
+                                         stable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
+                                         unstable_mask: np.ndarray | VectorType | gpd.GeoDataFrame = None,
+                                         list_var_names: Iterable[str] = None,
+                                         spread_statistic: Callable = nmad,
+                                         list_var_bins: Optional[Union[int,Iterable[Iterable]]] = None,
+                                         min_count: Optional[int] = 100,
+                                         fac_spread_outliers: float | None = 7,
+                                         ) -> tuple[np.ndarray | RasterType,
                                             pd.DataFrame,
                                             Callable[[tuple[np.ndarray, ...]], np.ndarray]]:
     """

From 2565aac6034ea8d1892ae6d251f67c85065370d2 Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Tue, 23 Aug 2022 14:20:26 +0200
Subject: [PATCH 18/21] Minor corrections for sphinx display

---
 examples/plot_heterosc_estimation_modelling.py |  4 ++--
 examples/plot_infer_heterosc.py                |  2 +-
 examples/plot_spatial_error_propagation.py     | 16 ++++++++++++----
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index 26e6697a..b44e9e82 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -1,6 +1,6 @@
 """
-Estimation and modelling of elevation heteroscedasticity
-========================================================
+Estimation and modelling of heteroscedasticity
+==============================================
 
 Digital elevation models have a precision that can vary with terrain and instrument-related variables. This variability
 in variance is called `heteroscedasticy <https://en.wikipedia.org/wiki/Homoscedasticity_and_heteroscedasticity>`_,
diff --git a/examples/plot_infer_heterosc.py b/examples/plot_infer_heterosc.py
index ed602754..e5628105 100644
--- a/examples/plot_infer_heterosc.py
+++ b/examples/plot_infer_heterosc.py
@@ -46,7 +46,7 @@
 
 # %%
 # The third output is the 2D binning interpolant, i.e. an error function with the slope and maximum curvature
-# (*Note: below we multiply the maximum curvature by 100 to convert it in m-1*):
+# (*Note: below we divide the maximum curvature by 100 to convert it in m-1*):
 for slope, maxc in [(0, 0), (40, 0), (0, 5), (40, 5)]:
     print('Error for a slope of {:.0f} degrees and'
           ' {:.0f} m-1 max. curvature: {:.1f} m'.format(slope, maxc/100, error_function((slope, maxc))))
diff --git a/examples/plot_spatial_error_propagation.py b/examples/plot_spatial_error_propagation.py
index 6e1fc66c..7770c74c 100644
--- a/examples/plot_spatial_error_propagation.py
+++ b/examples/plot_spatial_error_propagation.py
@@ -47,6 +47,10 @@
 stderr_glaciers = xdem.spatialstats.spatial_error_propagation(areas=areas, errors=errors,
                                                               params_variogram_model=params_variogram_model)
 
+for glacier_name, stderr_gla in [('Brombreen', stderr_glaciers[0]), ('Medalsbreen', stderr_glaciers[1])]:
+    print('The error in mean elevation change for {} is {:.2f} meters (1-sigma).'.format(glacier_name, stderr_gla))
+
+# %%
 # When passing a numerical area value, we compute an approximation with disk shape from Equation 8 of Rolstad et al.
 # (2009). This approximation is practical to visualize changes in elevation error when averaging over different area
 # sizes, but is less accurate to estimate the standard error of a certain area shape.
@@ -56,10 +60,14 @@
 plt.plot(areas / 10**6 , stderrs)
 plt.xlabel('Averaging area (km²)')
 plt.ylabel('Standard error (m)')
-plt.vlines(x=[np.pi*params_variogram_model['range'].values[0]**2 / 10**6,
-              np.pi*params_variogram_model['range'].values[1]**2 / 10**6],
+plt.vlines(x=np.pi*params_variogram_model['range'].values[0]**2 / 10**6,
            ymin=np.min(stderrs), ymax=np.max(stderrs), colors='red', linestyles='dashed',
-           label='Area of disk with radius the correlation range')
+           label='Disk area with radius the\n1st correlation range of {:,.0f} meters'
+           .format(params_variogram_model['range'].values[0]))
+plt.vlines(x=np.pi*params_variogram_model['range'].values[1]**2 / 10**6,
+           ymin=np.min(stderrs), ymax=np.max(stderrs), colors='blue', linestyles='dashed',
+           label='Disk area with radius the\n2nd correlation range of {:,.0f} meters'
+           .format(params_variogram_model['range'].values[1]))
 plt.xscale('log')
 plt.legend()
-
+plt.show()

From a67fcc2e19b023cc65b7d22908f091917e6b7dea Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Wed, 24 Aug 2022 11:37:03 +0200
Subject: [PATCH 19/21] Account for comments of amaury: fixes, descriptions
 clarity, typos, etc

---
 docs/source/coregistration.rst                |  2 +-
 docs/source/intro_accuracy_precision.rst      | 19 +++++++-----
 docs/source/spatialstats.rst                  |  2 +-
 .../plot_heterosc_estimation_modelling.py     | 17 +++++-----
 examples/plot_icp_coregistration.py           |  4 ++-
 examples/plot_infer_spatial_correlation.py    | 12 +++++--
 .../plot_variogram_estimation_modelling.py    | 11 +++----
 tests/test_spatialstats.py                    | 18 +++++------
 xdem/coreg.py                                 |  1 +
 xdem/spatialstats.py                          | 31 ++++++++++---------
 10 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/docs/source/coregistration.rst b/docs/source/coregistration.rst
index 5eb5b803..c250ba73 100644
--- a/docs/source/coregistration.rst
+++ b/docs/source/coregistration.rst
@@ -153,7 +153,7 @@ ICP
 - **Does not support weights**
 - **Recommended for:** Data with low noise and a high relative rotation.
 
-Iterative Closest Point (ICP) coregistration works by iteratively moving the data until it fits the reference as well as possible.
+Iterative Closest Point (ICP) coregistration, which is based on `Besl and McKay (1992) <https://doi.org/10.1117/12.57955>`_, works by iteratively moving the data until it fits the reference as well as possible.
 The DEMs are read as point clouds; collections of points with X/Y/Z coordinates, and a nearest neighbour analysis is made between the reference and the data to be aligned.
 After the distances are calculated, a rigid transform is estimated to minimise them.
 The transform is attempted, and then distances are calculated again.
diff --git a/docs/source/intro_accuracy_precision.rst b/docs/source/intro_accuracy_precision.rst
index 9bcf0a8c..863361bb 100644
--- a/docs/source/intro_accuracy_precision.rst
+++ b/docs/source/intro_accuracy_precision.rst
@@ -28,15 +28,14 @@ for when analyzing DEMs:
 - the **accuracy** (systematic error) of a DEM describes how close a DEM is to the true location of measured elevations on the Earth's surface,
 - the **precision** (random error) of a DEM describes the typical spread of its error in measurement, independently of a possible bias from the true positioning.
 
-.. figure:: imgs/precision_accuracy.png
-    :width: 80%
-
 *Note: sometimes "accuracy" is also used to describe both types of errors, and "trueness" systematic errors, following
 `ISO 5725-1 <https://www.iso.org/obp/ui/#iso:std:iso:5725:-1:ed-1:v1:en>`_. Here, we used accuracy for systematic errors
-as, to our knowledge, it is more common in remote sensing applications."
+as, to our knowledge, it is more common in remote sensing applications.*
 
-Source: `antarcticglaciers.org <http://www.antarcticglaciers.org/glacial-geology/dating-glacial-sediments2/precision-
-and-accuracy-glacial-geology/>`_, accessed 29.06.21.
+.. figure:: imgs/precision_accuracy.png
+    :width: 80%
+
+    Source: `antarcticglaciers.org <http://www.antarcticglaciers.org/glacial-geology/dating-glacial-sediments2/precision-and-accuracy-glacial-geology/>`_, accessed 29.06.21.
 
 Absolute or relative accuracy
 -----------------------------
@@ -51,6 +50,12 @@ TODO: Add another little schematic!
 Optimizing DEM absolute accuracy
 --------------------------------
 
+.. figure:: https://github.com/rhugonnet/dem_error_study/blob/main/figures/fig_2.png?raw=true
+    :witdh: 100%
+
+    Source: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_.
+
+
 Shifts due to poor absolute accuracy are common in elevation datasets, and can be easily corrected by performing a DEM
 co-registration to precise and accurate, quality-controlled elevation data such as `ICESat <https://icesat.gsfc.nasa.
 gov/icesat/>`_ and `ICESat-2 <https://icesat-2.gsfc.nasa.gov/>`_.
@@ -100,6 +105,6 @@ The tools for quantifying DEM precision are described in :ref:`spatialstats`.
     Functions that are used in several examples create duplicate examples intead of being merged into the list.
     Circumventing manually by selecting functions used only once in each example for now.
 
-.. minigallery:: xdem.spatialstats.infer_heteroscedasticity_from_stable xdem.spatialstats.infer_spatial_correlation_from_stable
+.. minigallery:: xdem.spatialstats.infer_heteroscedasticity_from_stable xdem.spatialstats.get_variogram_model_func xdem.spatialstats.sample_empirical_variogram
     :add-heading: Examples that use spatial statistics functions
 
diff --git a/docs/source/spatialstats.rst b/docs/source/spatialstats.rst
index 6e9ed127..f6a1b351 100644
--- a/docs/source/spatialstats.rst
+++ b/docs/source/spatialstats.rst
@@ -15,7 +15,7 @@ In particular, these tools help to:
 
     - account for elevation heteroscedasticity (e.g., varying precision with terrain slope),
     - quantify the spatial correlation of errors in DEMs (e.g., native spatial resolution, instrument noise),
-    - estimate robust errors for observations integrated in space (e.g., average or sum of samples),
+    - estimate robust errors for observations analyzed in space (e.g., average or sum of elevation, or of elevation changes),
     - propagate errors between spatial ensembles at different scales (e.g., sum of glacier volume changes).
 
 .. contents:: Contents 
diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index b44e9e82..58a86bae 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -40,17 +40,16 @@
 # We derive terrain attributes from the reference DEM (see :ref:`sphx_glr_auto_examples_plot_terrain_attributes.py`),
 # which we will use to explore the variability in elevation error.
 slope, aspect, planc, profc = \
-    xdem.terrain.get_terrain_attribute(dem=ref_dem.data,
-                                       attribute=['slope','aspect', 'planform_curvature', 'profile_curvature'],
-                                       resolution=ref_dem.res)
+    xdem.terrain.get_terrain_attribute(dem=ref_dem,
+                                       attribute=['slope','aspect', 'planform_curvature', 'profile_curvature'])
 
 # %%
-# We keep only stable terrain for the analysis of variability
-dh_arr = dh.data[~mask_glacier]
-slope_arr = slope[~mask_glacier]
-aspect_arr = aspect[~mask_glacier]
-planc_arr = planc[~mask_glacier]
-profc_arr = profc[~mask_glacier]
+# We convert to arrays and keep only stable terrain for the analysis of variability
+dh_arr = gu.spatial_tools.get_array_and_mask(dh)[0][~mask_glacier]
+slope_arr = gu.spatial_tools.get_array_and_mask(slope)[0][~mask_glacier]
+aspect_arr = gu.spatial_tools.get_array_and_mask(aspect)[0][~mask_glacier]
+planc_arr = gu.spatial_tools.get_array_and_mask(planc)[0][~mask_glacier]
+profc_arr = gu.spatial_tools.get_array_and_mask(profc)[0][~mask_glacier]
 
 # %%
 # We use :func:`xdem.spatialstats.nd_binning` to perform N-dimensional binning on all those terrain variables, with uniform
diff --git a/examples/plot_icp_coregistration.py b/examples/plot_icp_coregistration.py
index c1d3495b..1b77fe34 100644
--- a/examples/plot_icp_coregistration.py
+++ b/examples/plot_icp_coregistration.py
@@ -4,9 +4,11 @@
 Some DEMs may for one or more reason be erroneously rotated in the X, Y or Z directions.
 Established coregistration approaches like :ref:`coregistration_nuthkaab` work great for X, Y and Z *translations*, but rotation is not accounted for at all.
 
-ICP is one method that takes both rotation and translation into account.
+Iterative Closest Point (ICP) is one method that takes both rotation and translation into account.
 It is however not as good as :ref:`coregistration_nuthkaab` when it comes to sub-pixel accuracy.
 Fortunately, ``xdem`` provides the best of two worlds by allowing a combination of the two.
+
+**Reference**: `Besl and McKay (1992) <https://doi.org/10.1117/12.57955>`_.
 """
 # sphinx_gallery_thumbnail_number = 2
 import matplotlib.pyplot as plt
diff --git a/examples/plot_infer_spatial_correlation.py b/examples/plot_infer_spatial_correlation.py
index 0f0d6c71..3e56198d 100644
--- a/examples/plot_infer_spatial_correlation.py
+++ b/examples/plot_infer_spatial_correlation.py
@@ -21,18 +21,24 @@
 
 # %%
 # Then, we run the pipeline for inference of elevation heteroscedasticity from stable terrain (*Note: we pass a
-# random_state argument to ensure a fixed, reproducible random subsampling in this example*):
+# random_state argument to ensure a fixed, reproducible random subsampling in this example*). We ask for a fit with
+# a Gaussian model for short range (as it is passed first), and Spherical for long range (as it is passed second):
 df_empirical_variogram, df_model_params, spatial_corr_function = \
     xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=dh, list_models=['Gaussian', 'Spherical'],
                                                             unstable_mask=glacier_outlines, random_state=42)
 
 # %%
 # The first output corresponds to the dataframe of the empirical variogram, by default estimated using Dowd's estimator
-# and the circular sampling scheme of `skgstat.RasterEquidistantMetricSpace` (Fig. S13 of Hugonnet et al. (2022)):
+# and the circular sampling scheme of `skgstat.RasterEquidistantMetricSpace` (Fig. S13 of Hugonnet et al. (2022)). The
+# ``lags`` columns is the upper bound of spatial lag bins (lower bound of first bin being 0), the ``exp`` column is the
+# "experimental" variance value of the variogram in that bin, the ``count`` the number of pairwise samples, and
+# ``err_exp`` the 1-sigma error of the "experimental" variance, if more than one variogram is estimated with the
+# ``n_variograms`` parameter.
 df_empirical_variogram
 
 # %%
-# The second output is the dataframe of optimized model parameters for a sum of gaussian and spherical models:
+# The second output is the dataframe of optimized model parameters (``range``, ``sill``, and possibly ``smoothness``)
+# for a sum of gaussian and spherical models:
 df_model_params
 
 # %%
diff --git a/examples/plot_variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
index 4483e61d..add44442 100644
--- a/examples/plot_variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -78,8 +78,7 @@
 
 # %%
 # *Note: in this example, we add a* ``random_state`` *argument to yield a reproducible random sampling of pixels within
-# the grid, and a* ``runs`` *argument to reduce the computing time of* ``sgstat.MetricSpace.RasterEquidistantMetricSpace``
-# *which, by default, samples more data for robustness.*
+# the grid.*
 
 # %%
 # We plot the empirical variogram:
@@ -88,7 +87,7 @@
 # %%
 # With this plot, it is hard to conclude anything! Properly visualizing the empirical variogram is one of the most
 # important step. With grid data, we expect short-range correlations close to the resolution of the grid (~20-200
-# meters), but also possibly longer range correlation due to instrument noise or alignment issues (~1-50 km) (Hugonnet et al., in prep).
+# meters), but also possibly longer range correlation due to instrument noise or alignment issues (~1-50 km).
 #
 # To better visualize the variogram, we can either change the axis to log-scale, but this might make it more difficult
 # to later compare to variogram models. # Another solution is to split the variogram plot into subpanels, each with
@@ -113,7 +112,7 @@
 # is based on `scipy.optimize.curve_fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html>`_:
 func_sum_vgm1, params_vgm1 = xdem.spatialstats.fit_sum_model_variogram(list_models = ['Spherical'], empirical_variogram=df)
 
-func_sum_vgm2, params_vgm2 = xdem.spatialstats.fit_sum_model_variogram(list_models = ['Gaussian', 'Spherical'], empirical_variogram=df)
+func_sum_vgm2, params_vgm2 = xdem.spatialstats.fit_sum_model_variogram(list_models = ['Spherical', 'Spherical'], empirical_variogram=df)
 
 xdem.spatialstats.plot_variogram(df, list_fit_fun=[func_sum_vgm1, func_sum_vgm2],
                                  list_fit_fun_label=['Single-range model', 'Double-range model'],
@@ -123,11 +122,11 @@
 # The sum of two spherical models fits better, accouting for the small partial sill at longer ranges. Yet this longer
 # range partial sill (correlated variance) is quite small...
 #
-# **So one could ask himself: is it really important to account for this small additional "bump" in the variogram?**
+# **So one could wonder: is it really important to account for this small additional "bump" in the variogram?**
 #
 # To answer this, we compute the precision of the DEM integrated over a certain surface area based on spatial integration of the
 # variogram models using :func:`xdem.spatialstats.neff_circ`, with areas varying from pixel size to grid size.
-# Numerical and exact integration of variogram is fast, allowing us to estimate errors for a wide range of areas radidly.
+# Numerical and exact integration of variogram is fast, allowing us to estimate errors for a wide range of areas rapidly.
 
 areas = np.linspace(20**2, 10000**2, 1000)
 
diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 4fe34189..9e9b26db 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -40,7 +40,7 @@ def test_sample_multirange_variogram_default(self):
         """Verify that the default function runs, and its basic output"""
 
         # Load data
-        diff, mask = load_ref_and_diff()[1:3]
+        diff = load_ref_and_diff()[1]
 
         # Check the variogram output is consistent for a random state
         df0 = xdem.spatialstats.sample_empirical_variogram(
@@ -83,7 +83,7 @@ def test_sample_multirange_variogram_methods(self, subsample_method):
         """Verify that all other methods run"""
 
         # Load data
-        diff, mask = load_ref_and_diff()[1:3]
+        diff = load_ref_and_diff()[1]
 
         # Check the variogram estimation runs for several methods
         df = xdem.spatialstats.sample_empirical_variogram(
@@ -106,7 +106,7 @@ def test_sample_multirange_variogram_args(self):
         """Verify that optional parameters run only for their specific method, raise warning otherwise"""
 
         # Load data
-        diff, mask = load_ref_and_diff()[1:3]
+        diff = load_ref_and_diff()[1]
 
         pdist_args = {'pdist_multi_ranges':[0, diff.res[0]*5, diff.res[0]*10]}
         cdist_args = {'ratio_subsample': 0.5, 'samples': 50, 'runs': 10}
@@ -288,7 +288,7 @@ def test_estimate_model_spatial_correlation_and_infer_from_stable(self):
 
         # Run wrapper infer from stable function with a Raster and the mask, and check the consistency there as well
         emp_vgm_3, params_model_vgm_3, _ = \
-            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable, stable_mask=~mask_glacier.squeeze(),
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff, stable_mask=~mask_glacier.squeeze(),
                                                                     list_models=['Gau', 'Sph'], subsample=100, random_state=42)
         pd.testing.assert_frame_equal(emp_vgm_1, emp_vgm_3)
         pd.testing.assert_frame_equal(params_model_vgm_1, params_model_vgm_3)
@@ -296,7 +296,7 @@ def test_estimate_model_spatial_correlation_and_infer_from_stable(self):
         # Run again with array instead of Raster as input
         diff_on_stable_arr = gu.spatial_tools.get_array_and_mask(diff_on_stable)[0]
         emp_vgm_4, params_model_vgm_4, _ = \
-            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable_arr, gsd=diff_on_stable.res[0],
+            xdem.spatialstats.infer_spatial_correlation_from_stable(dvalues=diff_on_stable_arr, gsd=diff.res[0],
                                                                     stable_mask=~mask_glacier.squeeze(),
                                                                     list_models=['Gau', 'Sph'], subsample=100,
                                                                     random_state=42)
@@ -461,8 +461,7 @@ def test_neff_exact_and_approx_hugonnet(self):
     def test_number_effective_samples(self):
         """Test that the wrapper function for neff functions behaves correctly and that output values are robust"""
 
-        raster = load_ref_and_diff()[0]
-        outlines = load_ref_and_diff()[3]
+        raster, _, outlines = load_ref_and_diff()[1:]
 
         # The function should return the same result as neff_circular_approx_numerical when using a numerical area
         area = 10000
@@ -518,7 +517,7 @@ def test_number_effective_samples(self):
     def test_spatial_error_propagation(self):
         """Test that the spatial error propagation wrapper function runs properly"""
 
-        ref, diff, mask_glacier, vector_glacier = load_ref_and_diff()
+        ref, diff, vector_glacier = load_ref_and_diff()
 
         # Get the error map and variogram model with standardization
         slope, maxc = xdem.terrain.get_terrain_attribute(ref, attribute=['slope', 'maximum_curvature'])
@@ -823,7 +822,8 @@ def test_two_step_standardization(self):
         scale_fac = xdem.spatialstats.nmad(zscores)
         assert scale_fac != 1
 
-        # Filter with a factor of 5 and the standard deviation and check the function outputs the exact same array
+        # Filter with a factor of 3 and the standard deviation (not default values) and check the function outputs
+        # the exact same array
         zscores[np.abs(zscores) > 3*np.nanstd(zscores)] = np.nan
         scale_fac_std = np.nanstd(zscores)
         zscores /= scale_fac_std
diff --git a/xdem/coreg.py b/xdem/coreg.py
index 0d418e6c..0e5db317 100644
--- a/xdem/coreg.py
+++ b/xdem/coreg.py
@@ -869,6 +869,7 @@ def _to_matrix_func(self) -> np.ndarray:
 class ICP(Coreg):
     """
     Iterative Closest Point DEM coregistration.
+    Based on 3D registration of Besl and McKay (1992), https://doi.org/10.1117/12.57955.
 
     Estimates a rigid transform (rotation + translation) between two DEMs.
 
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 16c08602..834aab84 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -291,7 +291,7 @@ def two_step_standardization(dvalues: np.ndarray, list_var: Iterable[np.ndarray]
     Standardize the proxy differenced values using the modelled heteroscedasticity, re-scaled to the spread statistic,
     and generate the final standardization function.
 
-    :param dvalues: Proxy values array of size (N,)
+    :param dvalues: Proxy values as array of size (N,) (i.e., differenced values where signal should be zero such as elevation differences on stable terrain)
     :param list_var: List of size (L) of explanatory variables array of size (N,)
     :param unscaled_error_fun: Function of the spread with explanatory variables not yet re-scaled
     :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
@@ -339,7 +339,7 @@ def estimate_model_heteroscedasticity(dvalues: np.ndarray, list_var: Iterable[np
 
     The functions used are `nd_binning`, `interp_nd_binning` and `two_step_standardization`.
 
-    :param dvalues: Proxy values array of size (N,)
+    :param dvalues: Proxy values as array of size (N,) (i.e., differenced values where signal should be zero such as elevation differences on stable terrain)
     :param list_var: List of size (L) of explanatory variables array of size (N,)
     :param list_var_names: List of size (L) of names of the explanatory variables
     :param spread_statistic: Statistic to be computed for the spread; defaults to nmad
@@ -410,7 +410,7 @@ def infer_heteroscedasticity_from_stable(dvalues: np.ndarray | RasterType, list_
 
     If no stable or unstable mask is provided to mask in or out the values, all terrain is used.
 
-    :param dvalues: Proxy values as array or Raster
+    :param dvalues: Proxy values as array or Raster (i.e., differenced values where signal should be zero such as elevation differences on stable terrain)
     :param list_var: List of size (L) of explanatory variables as array or Raster of same shape as dvalues
     :param stable_mask: Vector shapefile of stable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
     :param unstable_mask: Vector shapefile of unstable terrain (if dvalues is Raster), or boolean array of same shape as dvalues
@@ -904,7 +904,7 @@ def sample_empirical_variogram(values: Union[np.ndarray, RasterType], gsd: float
     :param coords: Coordinates
     :param subsample: Number of samples to randomly draw from the values
     :param subsample_method: Spatial subsampling method
-    :param n_variograms: Number of independent empirical variogram estimations
+    :param n_variograms: Number of independent empirical variogram estimations (to estimate empirical variogram spread)
     :param n_jobs: Number of processing cores
     :param verbose: Print statements during processing
     :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
@@ -1310,10 +1310,13 @@ def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], l
     Estimate and model the spatial correlation of the input variable by empirical variogram sampling and fitting of a
     sum of variogram model.
 
+    The spatial correlation is returned as a function of spatial lags (in units of the input coordinates) which gives a
+    correlation value between 0 and 1.
+
     This function samples an empirical variogram using skgstat.Variogram, then optimizes by weighted least-squares the
     sum of a defined number of models, using the functions `sample_empirical_variogram` and `fit_sum_model_variogram`.
 
-    :param dvalues: Proxy values of studied variable
+    :param dvalues: Proxy values as array or Raster (i.e., differenced values where signal should be zero such as elevation differences on stable terrain)
     :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
         a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
         spherical model "Sph", "Spherical" or skgstat.models.spherical).
@@ -1323,14 +1326,14 @@ def estimate_model_spatial_correlation(dvalues: Union[np.ndarray, RasterType], l
     :param coords: Coordinates
     :param subsample: Number of samples to randomly draw from the values
     :param subsample_method: Spatial subsampling method
-    :param n_variograms: Number of independent empirical variogram estimations
+    :param n_variograms: Number of independent empirical variogram estimations (to estimate empirical variogram spread)
     :param n_jobs: Number of processing cores
     :param verbose: Print statements during processing
     :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
     :param bounds: Bounds of range and sill parameters for each model (shape K x 4 = K x range lower, range upper, sill lower, sill upper).
     :param p0: Initial guess of ranges and sills each model (shape K x 2 = K x range first guess, sill first guess).
 
-    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Spatial correlation function
+    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Function of spatial correlation (0 to 1) with spatial lags
     """
 
     empirical_variogram = sample_empirical_variogram(values=dvalues, estimator=estimator, gsd=gsd, coords=coords,
@@ -1353,8 +1356,8 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
                                           estimator = 'dowd', gsd: float = None, coords: np.ndarray = None,
                                           subsample: int = 1000, subsample_method: str = 'cdist_equidistant',
                                           n_variograms: int = 1, n_jobs: int = 1, verbose = False,
-                                          random_state: None | np.random.RandomState | np.random.Generator | int = None,
                                           bounds: list[tuple[float, float]] = None, p0: list[float] = None,
+                                          random_state: None | np.random.RandomState | np.random.Generator | int = None,
                                           **kwargs
                                           ) -> tuple[pd.DataFrame, pd.DataFrame, Callable[[np.ndarray], np.ndarray]]:
     """
@@ -1362,13 +1365,14 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     as a sum.
 
     This function returns a dataframe of the empirical variogram, a dataframe of optimized model parameters, and a
-    spatial correlation function.
+    spatial correlation function. The spatial correlation is returned as a function of spatial lags
+    (in units of the input coordinates) which gives a correlation value between 0 and 1.
     It is a convenience wrapper for `estimate_model_spatial_correlation` to work on either Raster or array and compute
     the stable mask.
 
     If no stable or unstable mask is provided to mask in or out the values, all terrain is used.
 
-    :param dvalues: Proxy values as array or Raster
+    :param dvalues: Proxy values as array or Raster (i.e., differenced values where signal should be zero such as elevation differences on stable terrain)
     :param list_models: List of K variogram models to sum for the fit in order from short to long ranges. Can either be
         a 3-letter string, full string of the variogram name or SciKit-GStat model function (e.g., for a
         spherical model "Sph", "Spherical" or skgstat.models.spherical).
@@ -1381,14 +1385,14 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     :param coords: Coordinates
     :param subsample: Number of samples to randomly draw from the values
     :param subsample_method: Spatial subsampling method
-    :param n_variograms: Number of independent empirical variogram estimations
+    :param n_variograms: Number of independent empirical variogram estimations (to estimate empirical variogram spread)
     :param n_jobs: Number of processing cores
     :param verbose: Print statements during processing
-    :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
     :param bounds: Bounds of range and sill parameters for each model (shape K x 4 = K x range lower, range upper, sill lower, sill upper).
     :param p0: Initial guess of ranges and sills each model (shape K x 2 = K x range first guess, sill first guess).
+    :param random_state: Random state or seed number to use for calculations (to fix random sampling during testing)
 
-    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Spatial correlation function
+    :return: Dataframe of empirical variogram, Dataframe of optimized model parameters, Function of spatial correlation (0 to 1) with spatial lags
     """
 
     # Check inputs
@@ -1411,7 +1415,6 @@ def infer_spatial_correlation_from_stable(dvalues: np.ndarray | RasterType,
     else:
         dvalues_arr = dvalues
 
-    # If the stable mask is not an array, create it
     # If the stable mask is not an array, create it
     if stable_mask is None:
         stable_mask_arr = np.ones(np.shape(dvalues_arr), dtype=bool)

From 175b35bf728d4ed9f589c7de4b869e3cd8e8199d Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Wed, 24 Aug 2022 13:13:31 +0200
Subject: [PATCH 20/21] Fix sphinx syntax issues

---
 docs/source/intro_accuracy_precision.rst       | 2 +-
 examples/plot_heterosc_estimation_modelling.py | 2 +-
 examples/plot_infer_spatial_correlation.py     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/intro_accuracy_precision.rst b/docs/source/intro_accuracy_precision.rst
index 863361bb..eb823e58 100644
--- a/docs/source/intro_accuracy_precision.rst
+++ b/docs/source/intro_accuracy_precision.rst
@@ -51,7 +51,7 @@ Optimizing DEM absolute accuracy
 --------------------------------
 
 .. figure:: https://github.com/rhugonnet/dem_error_study/blob/main/figures/fig_2.png?raw=true
-    :witdh: 100%
+    :width: 100%
 
     Source: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_.
 
diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index 58a86bae..77a6aff4 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -34,7 +34,7 @@
 ref_dem = xdem.DEM(xdem.examples.get_path("longyearbyen_ref_dem"))
 dh = xdem.DEM(xdem.examples.get_path("longyearbyen_ddem"))
 glacier_outlines = gu.Vector(xdem.examples.get_path("longyearbyen_glacier_outlines"))
-mask_glacier = glacier_outlines.create_mask(dh)
+mask_glacier = glacier_outlines.create_mask(dh).squeeze()
 
 # %%
 # We derive terrain attributes from the reference DEM (see :ref:`sphx_glr_auto_examples_plot_terrain_attributes.py`),
diff --git a/examples/plot_infer_spatial_correlation.py b/examples/plot_infer_spatial_correlation.py
index 3e56198d..89547d7d 100644
--- a/examples/plot_infer_spatial_correlation.py
+++ b/examples/plot_infer_spatial_correlation.py
@@ -29,7 +29,7 @@
 
 # %%
 # The first output corresponds to the dataframe of the empirical variogram, by default estimated using Dowd's estimator
-# and the circular sampling scheme of `skgstat.RasterEquidistantMetricSpace` (Fig. S13 of Hugonnet et al. (2022)). The
+# and the circular sampling scheme of ``skgstat.RasterEquidistantMetricSpace`` (Fig. S13 of Hugonnet et al. (2022)). The
 # ``lags`` columns is the upper bound of spatial lag bins (lower bound of first bin being 0), the ``exp`` column is the
 # "experimental" variance value of the variogram in that bin, the ``count`` the number of pairwise samples, and
 # ``err_exp`` the 1-sigma error of the "experimental" variance, if more than one variogram is estimated with the

From 2f3145eceebae24e338124e9e9e9ff9c217d6b2f Mon Sep 17 00:00:00 2001
From: rhugonne <romain.hugonnet@gmail.com>
Date: Wed, 24 Aug 2022 14:19:51 +0200
Subject: [PATCH 21/21] Fixes on warnings, wrong edits when accounting for
 comments

---
 docs/source/intro_accuracy_precision.rst      | 33 ++++++++++++-------
 .../plot_heterosc_estimation_modelling.py     | 32 +++++-------------
 .../plot_variogram_estimation_modelling.py    |  4 +--
 tests/test_spatialstats.py                    |  2 +-
 xdem/spatialstats.py                          |  2 +-
 5 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/docs/source/intro_accuracy_precision.rst b/docs/source/intro_accuracy_precision.rst
index eb823e58..6c9fc1ae 100644
--- a/docs/source/intro_accuracy_precision.rst
+++ b/docs/source/intro_accuracy_precision.rst
@@ -22,21 +22,34 @@ calculations consistent, reproducible, and easy.
 Accuracy and precision
 ----------------------
 
-Both `accuracy and precision <https://en.wikipedia.org/wiki/Accuracy_and_precision>`_ are important factors to account
-for when analyzing DEMs:
+`Accuracy and precision <https://en.wikipedia.org/wiki/Accuracy_and_precision>`_ describe random and systematic errors,
+respectively.
 
-- the **accuracy** (systematic error) of a DEM describes how close a DEM is to the true location of measured elevations on the Earth's surface,
-- the **precision** (random error) of a DEM describes the typical spread of its error in measurement, independently of a possible bias from the true positioning.
-
-*Note: sometimes "accuracy" is also used to describe both types of errors, and "trueness" systematic errors, following
-`ISO 5725-1 <https://www.iso.org/obp/ui/#iso:std:iso:5725:-1:ed-1:v1:en>`_. Here, we used accuracy for systematic errors
-as, to our knowledge, it is more common in remote sensing applications.*
+*Note: sometimes "accuracy" is also used to describe both types of errors, and "trueness" systematic errors, as defined
+in* `ISO 5725-1 <https://www.iso.org/obp/ui/#iso:std:iso:5725:-1:ed-1:v1:en>`_ *. Here, we used accuracy for systematic
+errors as, to our knowledge, it is a more commonly used terminology in remote sensing applications.*
 
 .. figure:: imgs/precision_accuracy.png
     :width: 80%
 
     Source: `antarcticglaciers.org <http://www.antarcticglaciers.org/glacial-geology/dating-glacial-sediments2/precision-and-accuracy-glacial-geology/>`_, accessed 29.06.21.
 
+
+For DEMs, we thus have:
+
+- **DEM accuracy** (systematic error) describes how close a DEM is to the true location of measured elevations on the Earth's surface,
+- **DEM precision** (random error) of a DEM describes the typical spread of its error in measurement, independently of a possible bias from the true positioning.
+
+The spatial structure of DEMs complexifies the notion of accuracy and precision, however. Spatially structured
+systematic errors are often related to the gridded nature of DEMs, creating **affine biases** while other, **specific
+biases** exist at the pixel scale. For random errors, a variability in error magnitude or **heteroscedasticity** exists
+across the DEM, while spatially structured patterns of errors are linked to **spatial correlations**.
+
+.. figure:: https://github.com/rhugonnet/dem_error_study/blob/main/figures/fig_2.png?raw=true
+    :width: 100%
+
+    Source: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_.
+
 Absolute or relative accuracy
 -----------------------------
 
@@ -50,10 +63,6 @@ TODO: Add another little schematic!
 Optimizing DEM absolute accuracy
 --------------------------------
 
-.. figure:: https://github.com/rhugonnet/dem_error_study/blob/main/figures/fig_2.png?raw=true
-    :width: 100%
-
-    Source: `Hugonnet et al. (2022) <https://doi.org/10.1109/jstars.2022.3188922>`_.
 
 
 Shifts due to poor absolute accuracy are common in elevation datasets, and can be easily corrected by performing a DEM
diff --git a/examples/plot_heterosc_estimation_modelling.py b/examples/plot_heterosc_estimation_modelling.py
index 77a6aff4..23eedcdb 100644
--- a/examples/plot_heterosc_estimation_modelling.py
+++ b/examples/plot_heterosc_estimation_modelling.py
@@ -177,9 +177,10 @@
 # We also identify that, steep slopes (> 40°) only correspond to high curvature, while the opposite is not true, hence
 # the importance of mapping the variability in two dimensions.
 #
-# Now we need to account for the non-stationarities identified. For this, the simplest approach is a numerical
-# approximation i.e. a piecewise linear interpolation/extrapolation based on the binning results.
-# To ensure that only robust statistic values are used in the interpolation, we set a ``min_count`` value at 30 samples.
+# Now we need to account for the heteroscedasticity identified. For this, the simplest approach is a numerical
+# approximation i.e. a piecewise linear interpolation/extrapolation based on the binning results available through
+# the function :func:`xdem.spatialstats.interp_nd_binning`. To ensure that only robust statistic values are used
+# in the interpolation, we set a ``min_count`` value at 30 samples.
 
 unscaled_dh_err_fun = xdem.spatialstats.interp_nd_binning(df, list_var_names=['slope', 'maxc'],
                                                            statistic='nmad', min_count=30)
@@ -197,7 +198,7 @@
 
 # %%
 # Thus, we rescale the function to exactly match the spread on stable terrain using the
-# ``xdem.spatialstats.two_step_standardization`` function, and get our final error function.
+# :func:`xdem.spatialstats.two_step_standardization` function, and get our final error function.
 
 zscores, dh_err_fun = xdem.spatialstats.two_step_standardization(dh_arr, list_var=[slope_arr, maxc_arr],
                                                            unscaled_error_fun=unscaled_dh_err_fun)
@@ -209,23 +210,6 @@
 # %%
 # This function can be used to estimate the spatial distribution of the elevation error on the extent of our DEMs:
 maxc = np.maximum(np.abs(profc), np.abs(planc))
-errors = dh_err_fun((slope, maxc))
-
-plt.figure(figsize=(8, 5))
-plt_extent = [
-    ref_dem.bounds.left,
-    ref_dem.bounds.right,
-    ref_dem.bounds.bottom,
-    ref_dem.bounds.top,
-]
-plt.imshow(errors.squeeze(), cmap="Reds", vmin=2, vmax=8, extent=plt_extent)
-cbar = plt.colorbar()
-cbar.set_label('Elevation error ($1\sigma$, m)')
-plt.show()
-
-# %%
-# These 3 steps can be done in one go using the ``xdem.spatialstats.estimate_model_heteroscedasticity`` function, which
-# wraps those three funtions, expecting ``np.ndarray`` inputs subset to stable terrain.
-
-df, dh_err_fun = xdem.spatialstats.estimate_model_heteroscedasticity(dvalues=dh_arr, list_var=[slope_arr, maxc_arr],
-                                                                     list_var_names=['slope', 'maxc'])
+errors = dh.copy(new_array= dh_err_fun((slope, maxc)))
+
+errors.show(cmap='Reds', vmin=2, vmax=8, cb_title='Elevation error ($1\sigma$, m)')
diff --git a/examples/plot_variogram_estimation_modelling.py b/examples/plot_variogram_estimation_modelling.py
index add44442..f6b4f4f1 100644
--- a/examples/plot_variogram_estimation_modelling.py
+++ b/examples/plot_variogram_estimation_modelling.py
@@ -33,12 +33,12 @@
 
 # %%
 # We exclude values on glacier terrain in order to isolate stable terrain, our proxy for elevation errors.
-dh.data[mask_glacier] = np.nan
+dh.set_mask(mask_glacier)
 
 # %%
 # We estimate the average per-pixel elevation error on stable terrain, using both the standard deviation
 # and normalized median absolute deviation. For this example, we do not account for elevation heteroscedasticity.
-print('STD: {:.2f} meters.'.format(np.nanstd(dh.data)))
+print('STD: {:.2f} meters.'.format(np.ma.std(dh.data)))
 print('NMAD: {:.2f} meters.'.format(xdem.spatialstats.nmad(dh.data)))
 
 # %%
diff --git a/tests/test_spatialstats.py b/tests/test_spatialstats.py
index 9e9b26db..51dfd162 100644
--- a/tests/test_spatialstats.py
+++ b/tests/test_spatialstats.py
@@ -517,7 +517,7 @@ def test_number_effective_samples(self):
     def test_spatial_error_propagation(self):
         """Test that the spatial error propagation wrapper function runs properly"""
 
-        ref, diff, vector_glacier = load_ref_and_diff()
+        ref, diff, _ , vector_glacier = load_ref_and_diff()
 
         # Get the error map and variogram model with standardization
         slope, maxc = xdem.terrain.get_terrain_attribute(ref, attribute=['slope', 'maximum_curvature'])
diff --git a/xdem/spatialstats.py b/xdem/spatialstats.py
index 834aab84..73d146c0 100644
--- a/xdem/spatialstats.py
+++ b/xdem/spatialstats.py
@@ -38,7 +38,7 @@ def nmad(data: np.ndarray, nfact: float = 1.4826) -> float:
     Calculate the normalized median absolute deviation (NMAD) of an array.
     Default scaling factor is 1.4826 to scale the median absolute deviation (MAD) to the dispersion of a normal
     distribution (see https://en.wikipedia.org/wiki/Median_absolute_deviation#Relation_to_standard_deviation, and
-    e.g. http://dx.doi.org/10.1016/j.isprsjprs.2009.02.003)
+    e.g. Höhle and Höhle (2009), http://dx.doi.org/10.1016/j.isprsjprs.2009.02.003)
 
     :param data: Input data
     :param nfact: Normalization factor for the data