BREAKING CHANGE: this breaks reproducible POC estimation (see changelog)

AFM-analysis · Aug 9, 2021 · 3b784e7 · 3b784e7
1 parent bceb7f3
commit 3b784e7
Show file tree

Hide file tree

Showing 10 changed files with 87 additions and 197 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,7 +1,18 @@
-2.1.0
+3.0.0
+ - BREAKING CHANGE: The contact point estimation method "scheme_2020"
+   has been removed, although it has been the default for some time.
+   It turns out that it does not perform so well and there are other
+   more stable methods (to be implemented). Furthermore, some of the
+   contact point estimation methods were improved so that basically
+   many tests had to be updated. This will not break your analysis,
+   it just means your contact points will change.
  - feat: implement options for preprocessing methods
  - feat: the "correct_tip_offset" preprocessing method now
    accepts the "method" argument (see new poc submodule)
+ - fix: contact point estimation with gradient-based method
+   "poc_gradient_zero_crossing" did not really work
+ - enh: improve contact point estimation with "fit_constant_line"
+ - enh: speed-up contact point estimation with "deviation_from_baseline"
  - ref: CLI profiles now use JSON format by default
    (old format still supported)
  - ref: move contact point estimation to new 'poc' submodule

diff --git a/nanite/indent.py b/nanite/indent.py
@@ -158,59 +158,10 @@ def estimate_optimal_mindelta(self):
         )
         return dopt
 
-    def estimate_contact_point_index(self, method="scheme_2020"):
-        """Estimate the contact point
-
-        Contact point (CP) estimation involves a preprocessing step
-        where the force data are transformed into gradient space
-        (to account for a slope in the approach curve) and a
-        subsequent analysis with two different methods to determine
-        when the gradient changes significantly enough to qualify for
-        a CP. Of those two methods, the one which yields the smallest
-        index (measured from the beginning of the approach curve)
-        is returned. If one of the methods fail, then a fit function
-        with a constant and linear part is used to determine the CP.
-
-        Preprocessing:
-
-        1. Compute the rolling average of the force
-           (Otherwise the gradient would be too wild)
-        2. Compute the gradient
-           (Converting to gradient space gets rid of linear
-           contributions in the approach part)
-        3. Compute the rolling average of the gradient
-           (Makes the curve to analyze more smooth so that the
-           methods below don't hit the alarm too early)
-
-        Method 1: baseline deviation
-
-        1. Obtain the baseline (initial 10% of the gradient curve)
-        2. Compute average and maximum deviation of the baseline
-        3. The CP is the index of the curve where it exceeds
-           twice of the maximum deviation
-
-        Method 2: sign of gradient
-
-        1. Apply a median filter to the approach curve
-        2. Compute the gradient
-        3. Cut off trailing 10 points from the gradient (noise)
-        4. The CP is the index of the gradient curve when the
-           sign changes, measured from the point of maximal
-           indentation.
-
-        If one of the methods fail, then a combined constant+linear
-        function (max(constant, linear) is fitted to the gradient to
-        determine the contact point. If that fails as well, then
-        the CP defaults to the center of the entire approach curve.
-
-        .. versionchanged:: 1.6.0
-            Add the gradient preprocessing step to circumvent issues
-            with tilted baselines. This feature does not significantly
-            affect fitting results.
-
-        .. versionchanged:: 1.6.1
-            Added max(constant, linear) fit when the other methods
-            fail.
+    def estimate_contact_point_index(self, method="deviation_from_baseline"):
+        """Estimate the contact point index
+
+        See the `poc` submodule for more information.
         """
         idp = poc.compute_poc(force=np.array(self["force"], copy=True),
                               method=method)

diff --git a/nanite/poc.py b/nanite/poc.py
@@ -1,7 +1,7 @@
 """Methods for estimating the point of contact (POC)"""
 import lmfit
 import numpy as np
-import scipy.signal as spsig
+from scipy.ndimage.filters import uniform_filter1d
 
 
 #: List of all methods available for contact point estimation
@@ -17,38 +17,7 @@ def compute_preproc_clip_approach(force):
     return fg
 
 
-def compute_preproc_gradient(force):
-    """Compute the gradient of the force curve
-
-    This method also removes the tilt in the approach part.
-
-    1. Compute the rolling average of the force
-       (Otherwise the gradient would be too wild)
-    2. Compute the gradient
-       (Converting to gradient space gets rid of linear
-       contributions in the approach part)
-    3. Compute the rolling average of the gradient
-       (Makes the curve to analyze more smooth so that the
-      methods below don't hit the alarm too early)
-    """
-    # apply rolling average filter to force
-    p1_fs = min(47, force.size // 2 // 2 * 2 + 1)
-    assert p1_fs % 2 == 1, "must be odd"
-    p1_cumsum_vec = np.cumsum(np.insert(np.copy(force), 0, 0))
-    p1 = (p1_cumsum_vec[p1_fs:] - p1_cumsum_vec[:-p1_fs]) / p1_fs
-    # take the gradient
-    if p1.size > 1:
-        p1g = np.gradient(p1)
-        # apply rolling average filter to the gradient
-        p1g_cumsum_vec = np.cumsum(np.insert(np.copy(p1g), 0, 0))
-        p1gm = (p1g_cumsum_vec[p1_fs:] - p1g_cumsum_vec[:-p1_fs]) / p1_fs
-    else:
-        # fallback for bad data (array with very few elements)
-        p1gm = p1
-    return p1gm
-
-
-def compute_poc(force, method="scheme_2020"):
+def compute_poc(force, method="deviation_from_baseline"):
     """Compute the contact point from force data
 
     If the POC method returns np.nan, then the center of the
@@ -59,8 +28,6 @@ def compute_poc(force, method="scheme_2020"):
         if mfunc.identifier == method:
             if "clip_approach" in mfunc.preprocessing:
                 force = compute_preproc_clip_approach(force)
-            if "gradient" in mfunc.preprocessing:
-                force = compute_preproc_gradient(force)
             cp = mfunc(force)
             break
     else:
@@ -85,7 +52,7 @@ def poc(identifier, name, preprocessing):
         (e.g. "Deviation from baseline")
     preprocessing: list of str
         list of preprocessing methods that should be applied;
-        may contain ["gradient", "clip_approach"].
+        may contain ["clip_approach"].
     """
     def attribute_setter(func):
         """Decorator that sets the necessary attributes
@@ -105,7 +72,7 @@ def attribute_setter(func):
 
 @poc(identifier="deviation_from_baseline",
      name="Deviation from baseline",
-     preprocessing=["gradient", "clip_approach"])
+     preprocessing=["clip_approach"])
 def poc_deviation_from_baseline(force):
     """Deviation from baseline
 
@@ -121,14 +88,16 @@ def poc_deviation_from_baseline(force):
         bl_avg = np.average(baseline)
         bl_rng = np.max(np.abs(baseline - bl_avg)) * 2
         bl_dev = (force - bl_avg) > bl_rng
-        if np.sum(bl_dev):
-            cp = np.where(bl_dev)[0][0]
+        # argmax gets the first largest value
+        maxid = np.argmax(bl_dev)
+        if bl_dev[maxid]:
+            cp = maxid
     return cp
 
 
 @poc(identifier="fit_constant_line",
      name="Piecewise fit with constant and line",
-     preprocessing=["gradient", "clip_approach"])
+     preprocessing=["clip_approach"])
 def poc_fit_constant_line(force):
     """Piecewise fit with constant and line
 
@@ -145,101 +114,61 @@ def residual(params, x, data):
         m = params["m"]
         one = off
         two = m * (x - x0) + off
-        return data - np.maximum(one, two)
+        curve = np.maximum(one, two)
+        return data - curve
 
+    y = np.array(force, copy=True)
     cp = np.nan
-    if force.size > 4:  # 3 fit parameters
-        x = np.arange(force.size)
-
+    if y.size > 4:  # 3 fit parameters
+        ymin, ymax = np.min(y), np.max(y)
+        y = (y - ymin) / (ymax - ymin)
+        x = np.arange(y.size)
+        x0 = poc_deviation_from_baseline(force)
+        if np.isnan(x0):
+            x0 = y.size // 2
         params = lmfit.Parameters()
-        params.add('off', value=np.mean(force[:10]))
-        params.add('x0', value=force.size // 2)
-        params.add('m', value=(force.max() - force.min()) / force.size)
+        params.add('off', value=np.mean(y[:10]))
+        params.add('x0', value=x0)
+        params.add('m', value=1)
 
-        out = lmfit.minimize(residual, params, args=(x, force))
+        out = lmfit.minimize(residual, params, args=(x, y))
         if out.success:
             cp = int(out.params["x0"])
-
     return cp
 
 
 @poc(identifier="gradient_zero_crossing",
      name="Gradient zero-crossing of indentation part",
-     preprocessing=["gradient", "clip_approach"])
+     preprocessing=["clip_approach"])
 def poc_gradient_zero_crossing(force):
     """Gradient zero-crossing of indentation part
 
-    1. Apply a median filter to the curve
+    1. Apply a moving average filter to the curve
     2. Compute the gradient
-    3. Cut off trailing 10 points from the gradient (noise)
-    4. The CP is the index of the gradient curve when the
-       sign changes, measured from the point of maximal
-       indentation.
+    3. Cut off gradient at maxiumum with a 10 point reserve
+    4. Apply a moving average filter to the gradient
+    5. The POC is the index of the averaged gradient curve where
+       the values are below 1% of the gradient maximum.
     """
     cp = np.nan
     # Perform a median filter to smooth the array
-    filtsize = 15
-    y = spsig.medfilt(force, filtsize)
-    # Cut off the trailing 10 points (noise)
-    cutoff = 10
-    if y.size > cutoff + 1:
+    filtsize = max(5, int(force.size*.01))
+    y = uniform_filter1d(force, size=filtsize)
+    if y.size > 1:
+        # Cutoff at maximum plus some reserve
+        cutoff = y.size - np.argmax(y) + 10
         grad = np.gradient(y)[:-cutoff]
-        # Use the point where the gradient becomes positive for the
-        # first time.
-        gradpos = grad > 0
-        if np.sum(gradpos):
-            # The contains positive values.
-            # Flip `gradpos`, because we want the first value from the
-            # end of the array.
-            cp = y.size - np.where(gradpos[::-1])[0][0] - cutoff - 1
-    return cp
-
-
-@poc(identifier="scheme_2020",
-     name="Heuristic analysis pipeline 2020",
-     preprocessing=["gradient", "clip_approach"])
-def poc_scheme_2020(force):
-    """Heuristic analysis pipeline 2020
-
-    This pipeline was first implemented in nanite 1.6.1 in the
-    year of 2020.
-
-    Preprocessing:
-
-    1. Compute the rolling average of the force
-       (Otherwise the gradient would be too wild)
-    2. Compute the gradient
-       (Converting to gradient space gets rid of linear
-       contributions in the approach part)
-    3. Compute the rolling average of the gradient
-       (Makes the curve to analyze more smooth so that the
-       methods below don't hit the alarm too early)
-
-    Method 1: baseline deviation
-
-    1. Obtain the baseline (initial 10% of the gradient curve)
-    2. Compute average and maximum deviation of the baseline
-    3. The CP is the index of the curve where it exceeds
-       twice of the maximum deviation
-
-    Method 2: sign of gradient
-
-    1. Apply a median filter to the approach curve
-    2. Compute the gradient
-    3. Cut off trailing 10 points from the gradient (noise)
-    4. The CP is the index of the gradient curve when the
-       sign changes, measured from the point of maximal
-       indentation.
-
-    If one of the methods fail, then a combined constant+linear
-    function (max(constant, linear) is fitted to the gradient to
-    determine the contact point.
-    """
-    cp1 = poc_deviation_from_baseline(np.array(force, copy=True))
-    cp2 = poc_gradient_zero_crossing(np.array(force, copy=True))
-
-    if np.isnan(cp1) or np.isnan(cp2):
-        cp = poc_fit_constant_line(force)
-    else:
-        cp = min(cp1, cp2)
+        if grad.size > 50:
+            # Use the point where the gradient becomes small enough.
+            gradn = uniform_filter1d(grad, size=filtsize)
+            gradpos = gradn <= 0.01 * np.max(gradn)
+            if np.sum(gradpos):
+                # The gradient contains positive values.
+                # Flip `gradpos`, because we want the first value from the
+                # end of the array.
+                # Weight with two times "filtsize//2", because we actually
+                # want the rolling median filter from the edge and not at the
+                # center of the array (and two times, because we did two
+                # filter operations).
+                cp = y.size - np.where(gradpos[::-1])[0][0] - cutoff + filtsize
     return cp
diff --git a/nanite/preproc.py b/nanite/preproc.py
@@ -189,7 +189,8 @@ def compute_tip_position(apret):
     def correct_force_offset(apret):
         """Correct the force offset with an average baseline value
         """
-        idp = apret.estimate_contact_point_index()
+        idp = poc.compute_poc(force=apret["force"],
+                              method="deviation_from_baseline")
         if idp:
             apret["force"] -= np.average(apret["force"][:idp])
         else:
@@ -207,7 +208,7 @@ def correct_force_offset(apret):
              "choices_human_readable": [p.name for p in poc.POC_METHODS]}
         ]
         )
-    def correct_tip_offset(apret, method="scheme_2020"):
+    def correct_tip_offset(apret, method="deviation_from_baseline"):
         """Correct the offset of the tip position
 
         An estimate of the tip position is used to compute the
@@ -235,8 +236,8 @@ def correct_split_approach_retract(apret):
         x = np.array(apret["tip position"], copy=True)
         y = np.array(apret["force"], copy=True)
 
-        idp = apret.estimate_contact_point_index()
-        if idp:
+        idp = poc.poc_deviation_from_baseline(y)
+        if idp and not np.isnan(idp):
             # Flip and normalize tip position so that maximum is at minimum
             # z-position (set to 1) which coincides with maximum indentation.
             x -= x[idp]

diff --git a/tests/test_fit_ancillary.py b/tests/test_fit_ancillary.py
@@ -49,7 +49,7 @@ def test_simple_ancillary_override():
                        params_initial=params_initial)
         assert idnt.fit_properties["params_initial"]["E"].value == 1580
         assert np.allclose(idnt.fit_properties["params_fitted"]["E"].value,
-                           1572.7685940809245,
+                           1584.8941261696934,
                            atol=0,
                            rtol=1e-5)
 
@@ -75,7 +75,7 @@ def test_simple_ancillary_override_nan():
                        params_initial=params_initial)
         assert idnt.fit_properties["params_initial"]["E"].value == 3000
         assert np.allclose(idnt.fit_properties["params_fitted"]["E"].value,
-                           1584.9805568368859,
+                           1584.8876592662375,
                            atol=0,
                            rtol=1e-5)
 

diff --git a/tests/test_fit_emodulus_search.py b/tests/test_fit_emodulus_search.py
@@ -33,7 +33,7 @@ def test_emodulus_search():
 
     # This assertion might fail when the preprocessing changes
     # or when the search algorithm for the optimal fit changes.
-    dopt = -2.07633035802137e-07
+    dopt = -1.2047321672207138e-07
     assert np.allclose(ar.fit_properties["optimal_fit_delta"], dopt)
 
     if __name__ == "__main__":

diff --git a/tests/test_indent.py b/tests/test_indent.py
@@ -68,7 +68,7 @@ def test_basic():
                               "correct_tip_offset"])
     # This value is subject to change if a better way to estimate the
     # contact point is found:
-    assert idnt["tip position"][0] == 4.507445225784438e-06
+    assert idnt["tip position"][0] == 4.765854684370548e-06
 
 
 def test_export():

diff --git a/tests/test_open_jpk_variation.py b/tests/test_open_jpk_variation.py
@@ -22,7 +22,7 @@ def test_process_flipsign():
     idnt.fit_model(model_key="hertz_para", weight_cp=False,
                    params_initial=params_initial)
     assert np.allclose(idnt.fit_properties["params_fitted"]["E"].value,
-                       5257.047288859021)
+                       5230.008779761989)
 
 
 if __name__ == "__main__":