Skip to content

Commit

Permalink
enh: improve preprocessing ordering
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Aug 16, 2021
1 parent 8330318 commit b753ac5
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 21 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
information about the procedure (currently plottable data to
understand the process)
- fix: spatial smoothing not working in some cases (#15)
- enh: add `steps_optional` in preprocessing to allow fine-grained
control about order of application
- ref: remove "...smoothed" column data (which was never used anyway);
instead, apply smoothing directly to AFMData subclass
- ref: rename `require_steps` to `steps_required` in preprocessing
decorator
- setup: bump afmformats from 0.16.0 to 0.16.3
3.0.0
- BREAKING CHANGE: The contact point estimation method "scheme_2020"
Expand Down
82 changes: 63 additions & 19 deletions nanite/preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ class CannotSplitWarning(UserWarning):
pass


def preprocessing_step(identifier, name, require_steps=None, options=None):
def preprocessing_step(identifier, name, steps_required=None,
steps_optional=None, options=None):
"""Decorator for Indentation preprocessors
The name and identifier are stored as a property of the wrapped
Expand All @@ -26,9 +27,12 @@ def preprocessing_step(identifier, name, require_steps=None, options=None):
name: str
human-readble name of the preprocessor
(e.g. "Estimate contact point")
require_steps: list of str
steps_required: list of str
list of preprocessing steps that must be added before this
step
steps_optional: list of str
unlike `steps_required`, these steps do not have to be set,
but if they are set, they should come before this step
options: list of dict
if the preprocessor accepts optional keyword arguments,
this list yields valid values or dtypes
Expand All @@ -44,7 +48,8 @@ def attribute_setter(func):
assert isinstance(name, str)
func.name = name
func.options = options
func.require_steps = require_steps
func.steps_required = steps_required
func.steps_optional = steps_optional
return func

return attribute_setter
Expand Down Expand Up @@ -86,11 +91,11 @@ class instance. If you are using this class directly and
for ii, pid in enumerate(identifiers):
if pid in IndentationPreprocessor.available():
meth = IndentationPreprocessor.get_func(pid)
req = meth.require_steps
req = meth.steps_required
act = identifiers[:ii]
if req is not None and ((set(req) & set(act)) != set(req)):
raise ValueError(f"The preprocessing step '{pid}' requires"
f" the steps {meth.require_steps}!")
f" the steps {meth.steps_required}!")
kwargs = options.get(pid, {}) # empty dict if not defined
if "ret_details" in inspect.signature(meth).parameters:
# only set `ret_details` if method accepts it
Expand All @@ -104,20 +109,33 @@ class instance. If you are using this class directly and

@staticmethod
def autosort(identifiers):
"""Automatically sort preprocessing identifiers via require_steps"""
"""Automatically sort preprocessing identifiers
This takes into account `steps_required` and `steps_optional`.
"""
sorted_identifiers = copy.copy(identifiers)
for pid in identifiers:
meth = IndentationPreprocessor.get_func(pid)
if meth.require_steps is not None:
steps_precursor = []
if meth.steps_required is not None:
steps_precursor += meth.steps_required
if meth.steps_optional is not None:
for ostep in meth.steps_optional:
if ostep in identifiers:
steps_precursor.append(ostep)
for step in steps_precursor:
# We have a requirement, check whether it is fulfilled
cix = sorted_identifiers.index(pid)
rix = [sorted_identifiers.index(r) for r in meth.require_steps]
if np.any(np.array(rix) > cix):
# We change the order by popping the original cix and
# then inserting the step after the largest rix.
sorted_identifiers.remove(pid)
new_cix = np.max(rix) + 1
sorted_identifiers.insert(new_cix, pid)
rix = sorted_identifiers.index(step)
if rix > cix:
# We pop the wrong requirement and insert it before
# the current pid.
sorted_identifiers.remove(step)
sorted_identifiers.insert(cix, step)

# Perform a sanity check
IndentationPreprocessor.check_order(sorted_identifiers)

return sorted_identifiers

@staticmethod
Expand All @@ -131,6 +149,25 @@ def available():
av.append(func.identifier)
return sorted(av)

@staticmethod
def check_order(identifiers):
"""Check preprocessing steps for correct order"""
for cix, pid in enumerate(identifiers):
meth = IndentationPreprocessor.get_func(pid)
if meth.steps_required:
rix = [identifiers.index(r) for r in meth.steps_required]
if np.any(np.array(rix) > cix):
raise ValueError(
f"Wrong required step order for {pid}: {identifiers}!")
if meth.steps_optional:
rio = []
for rr in meth.steps_optional:
if rr in identifiers:
rio.append(identifiers.index(rr))
if np.any(np.array(rio) > cix):
raise ValueError(
f"Wrong optional step order for {pid}: {identifiers}!")

@staticmethod
def get_func(identifier):
"""Return preprocessor function for identifier"""
Expand All @@ -148,10 +185,10 @@ def get_name(identifier):
return func.name

@staticmethod
def get_require_steps(identifier):
def get_steps_required(identifier):
"""Return requirement identifiers for identifier"""
func = IndentationPreprocessor.get_func(identifier)
return func.require_steps
return func.steps_required

@staticmethod
@preprocessing_step(identifier="compute_tip_position",
Expand Down Expand Up @@ -207,7 +244,7 @@ def correct_force_offset(apret):
@preprocessing_step(
identifier="correct_tip_offset",
name="contact point estimation",
require_steps=["compute_tip_position"],
steps_required=["compute_tip_position"],
options=[
{"name": "method",
"type": str,
Expand Down Expand Up @@ -235,7 +272,7 @@ def correct_tip_offset(apret, method="deviation_from_baseline",
@staticmethod
@preprocessing_step(identifier="correct_split_approach_retract",
name="segment discovery",
require_steps=["compute_tip_position"])
steps_required=["compute_tip_position"])
def correct_split_approach_retract(apret):
"""Split the approach and retract curves (farthest point method)
Expand Down Expand Up @@ -276,7 +313,14 @@ def correct_split_approach_retract(apret):

@staticmethod
@preprocessing_step(identifier="smooth_height",
name="monotonic height data")
name="monotonic height data",
steps_optional=[
# Otherwise we lose the location of the point
# of deepest indentation:
"correct_split_approach_retract",
# Otherwise it might not be applied to
# "tip position":
"compute_tip_position"])
def smooth_height(apret):
"""Make height data monotonic
Expand Down
53 changes: 51 additions & 2 deletions tests/test_preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,55 @@ def test_autosort():
assert expected == actual


def test_autosort2():
unsorted = ["correct_split_approach_retract",
"correct_force_offset",
"correct_tip_offset",
"compute_tip_position",
]
expected = ["compute_tip_position",
"correct_split_approach_retract",
"correct_force_offset",
"correct_tip_offset",
]
actual = IndentationPreprocessor.autosort(unsorted)
assert (actual.index("correct_split_approach_retract")
> actual.index("compute_tip_position"))
assert (actual.index("correct_tip_offset")
> actual.index("compute_tip_position"))
assert expected == actual


def test_autosort3():
unsorted = ["smooth_height",
"correct_split_approach_retract",
"correct_force_offset",
"correct_tip_offset",
"compute_tip_position",
]
expected = ["compute_tip_position",
"correct_split_approach_retract",
"smooth_height",
"correct_force_offset",
"correct_tip_offset",
]
actual = IndentationPreprocessor.autosort(unsorted)
assert expected == actual
assert (actual.index("correct_split_approach_retract")
> actual.index("compute_tip_position"))
assert (actual.index("correct_tip_offset")
> actual.index("compute_tip_position"))
assert (actual.index("smooth_height")
> actual.index("correct_split_approach_retract"))


def test_check_order():
with pytest.raises(ValueError, match="Wrong optional step order"):
IndentationPreprocessor.check_order([
"smooth_height",
"correct_split_approach_retract"])


def test_correct_split_approach_retract():
fd = IndentationGroup(data_path / "spot3-0192.jpk-force")[0]

Expand All @@ -37,8 +86,8 @@ def test_correct_split_approach_retract():
assert fd.appr["segment"].size == 2006


def test_get_require_steps():
req_act = IndentationPreprocessor.get_require_steps("correct_tip_offset")
def test_get_steps_required():
req_act = IndentationPreprocessor.get_steps_required("correct_tip_offset")
req_exp = ["compute_tip_position"]
assert req_act == req_exp

Expand Down

0 comments on commit b753ac5

Please sign in to comment.