Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add robust polynomial, sum of sinusoids fitting #151

Merged
merged 39 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f7f6108
Merge pull request #6 from GlacioHack/main
rhugonnet Mar 27, 2021
9c97f54
Merge branch 'GlacioHack:main' into main
rhugonnet Jun 17, 2021
4c4bfbf
add robust polynomial fit + tests
rhugonnet Jun 22, 2021
54872f8
add array dimension to docs
rhugonnet Jun 22, 2021
bafec5e
streamline test polynomial fit
rhugonnet Jun 22, 2021
204434a
fix polynomial fit tests
rhugonnet Jun 22, 2021
78e2ef7
import scikit-learn as optional dependency
rhugonnet Jun 22, 2021
eb397fa
Merge branch 'main' into angle_binning
rhugonnet Jun 22, 2021
20e8e5e
use new subsample function + small fixes
rhugonnet Jun 22, 2021
3c4ea4a
fix test
rhugonnet Jun 22, 2021
6abe090
add comments
rhugonnet Jun 22, 2021
e6034ea
fixes with Eriks comments
rhugonnet Jun 24, 2021
86d565b
improve tests with Erik comments
rhugonnet Jun 24, 2021
54d4a22
fix test
rhugonnet Jun 24, 2021
cc9398f
add draft for robust scaling using ML methods
rhugonnet Jun 24, 2021
6335fe7
draft robust sum of sin basinhopping
rhugonnet Jun 25, 2021
46f2c08
move nd binning to spatial_tools
rhugonnet Jun 25, 2021
549a734
finish basinhopping for sumfit
rhugonnet Jun 25, 2021
8e589e7
add tests for sum of sins
rhugonnet Jun 25, 2021
c19953b
move tests for nd binning
rhugonnet Jun 25, 2021
1043845
fix typing error
rhugonnet Jun 25, 2021
268e63a
fix tests
rhugonnet Jun 25, 2021
6e7cf23
Merge remote-tracking branch 'upstream/main' into angle_binning
rhugonnet Sep 6, 2021
5cfe2e7
rewrite tests with pytest.approx
rhugonnet Sep 6, 2021
1ca8c4f
use np.polyval instead of writing out the polynomial
rhugonnet Sep 6, 2021
f060cf9
rest of amaury comments
rhugonnet Sep 6, 2021
616bd7e
add fit module, refactor nmad into spatialstats
rhugonnet Sep 6, 2021
700bf85
fix tests
rhugonnet Sep 6, 2021
a2d4acf
finish refactor nmad, fix tests
rhugonnet Sep 7, 2021
dba11f4
increase error margin of test
rhugonnet Sep 7, 2021
28e96ed
try fixing test
rhugonnet Sep 7, 2021
aaf818f
add print statement to check values in CI
rhugonnet Sep 7, 2021
317c58c
move print statement to the right place
rhugonnet Sep 7, 2021
3051384
streamline comments
rhugonnet Sep 7, 2021
9dc9d5f
further streamline comments
rhugonnet Sep 7, 2021
5387422
remove print statement
rhugonnet Sep 7, 2021
917d905
subdivide scipy and sklearn into wrapper functions for reuse and clarity
rhugonnet Sep 7, 2021
6588c01
skip randomly failing test
rhugonnet Sep 7, 2021
0d2e7ec
fix skip syntax
rhugonnet Sep 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/code/coregistration_plot_nuth_kaab.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
ddem_pre = (dem_2009.data - dem_1990.data).filled(np.nan).squeeze()
ddem_post = (dem_2009.data - dem_coreg).filled(np.nan).squeeze()

nmad_pre = xdem.spatial_tools.nmad(ddem_pre[inlier_mask.squeeze()])
nmad_post = xdem.spatial_tools.nmad(ddem_post[inlier_mask.squeeze()])
nmad_pre = xdem.spatialstats.nmad(ddem_pre[inlier_mask.squeeze()])
nmad_post = xdem.spatialstats.nmad(ddem_post[inlier_mask.squeeze()])

vlim = 20
plt.figure(figsize=(8, 5))
Expand Down
4 changes: 2 additions & 2 deletions examples/plot_blockwise_coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,5 @@
# %%
# We can compare the :ref:`spatial_stats_nmad` to validate numerically that there was an improvment:

print(f"Error before: {xdem.spatial_tools.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatial_tools.nmad(diff_after):.2f} m")
print(f"Error before: {xdem.spatialstats.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatialstats.nmad(diff_after):.2f} m")
2 changes: 1 addition & 1 deletion examples/plot_norm_regional_hypso.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@

difference = (ddem_filled - ddem).squeeze()[random_nans].filled(np.nan)
median = np.nanmedian(difference)
nmad = xdem.spatial_tools.nmad(difference)
nmad = xdem.spatialstats.nmad(difference)

plt.title(f"Median: {median:.2f} m, NMAD: {nmad:.2f} m")
plt.hist(difference, bins=np.linspace(-15, 15, 100))
Expand Down
4 changes: 2 additions & 2 deletions examples/plot_nuth_kaab.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
# %%
# We can compare the :ref:`spatial_stats_nmad` to validate numerically that there was an improvment:

print(f"Error before: {xdem.spatial_tools.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatial_tools.nmad(diff_after):.2f} m")
print(f"Error before: {xdem.spatialstats.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatialstats.nmad(diff_after):.2f} m")

# %%
# In the plot above, one may notice a positive (blue) tendency toward the east.
Expand Down
12 changes: 6 additions & 6 deletions tests/test_coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

with warnings.catch_warnings():
warnings.simplefilter("ignore")
from xdem import coreg, examples, spatial_tools, misc
from xdem import coreg, examples, spatial_tools, spatialstats, misc
import xdem


Expand Down Expand Up @@ -698,7 +698,7 @@ def test_apply_matrix():
# Check that the median is very close to zero
assert np.abs(np.nanmedian(diff)) < 0.01
# Check that the NMAD is low
assert spatial_tools.nmad(diff) < 0.01
assert spatialstats.nmad(diff) < 0.01

def rotation_matrix(rotation=30):
rotation = np.deg2rad(rotation)
Expand All @@ -721,7 +721,7 @@ def rotation_matrix(rotation=30):
)
# Make sure that the rotated DEM is way off, but is centered around the same approximate point.
assert np.abs(np.nanmedian(rotated_dem - ref.data.data)) < 1
assert spatial_tools.nmad(rotated_dem - ref.data.data) > 500
assert spatialstats.nmad(rotated_dem - ref.data.data) > 500

# Apply a rotation in the opposite direction
unrotated_dem = coreg.apply_matrix(
Expand Down Expand Up @@ -775,8 +775,8 @@ def rotation_matrix(rotation=30):
# Check that the median is very close to zero
assert np.abs(np.nanmedian(diff)) < 0.5
# Check that the NMAD is low
assert spatial_tools.nmad(diff) < 5
print(np.nanmedian(diff), spatial_tools.nmad(diff))
assert spatialstats.nmad(diff) < 5
print(np.nanmedian(diff), spatialstats.nmad(diff))


def test_warp_dem():
Expand Down Expand Up @@ -874,7 +874,7 @@ def test_warp_dem():
)
# Validate that the DEM is now more or less the same as the original.
# Due to the randomness, the threshold is quite high, but would be something like 10+ if it was incorrect.
assert spatial_tools.nmad(dem - untransformed_dem) < 0.5
assert spatialstats.nmad(dem - untransformed_dem) < 0.5

if False:
import matplotlib.pyplot as plt
Expand Down
143 changes: 143 additions & 0 deletions tests/test_fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""
Functions to test the fitting tools.
"""
import numpy as np
import pandas as pd
import pytest

import xdem

from sklearn.metrics import mean_squared_error, median_absolute_error

class TestRobustFitting:

@pytest.mark.parametrize("pkg_estimator", [('sklearn','Linear'), ('scipy','Linear'), ('sklearn','Theil-Sen'),
('sklearn','RANSAC'),('sklearn','Huber')])
def test_robust_polynomial_fit(self, pkg_estimator: str):

# Define x vector
x = np.linspace(1, 10, 1000)
# Define exact polynomial
true_coefs = [-100, 5, 3, 2]
y = np.polyval(np.flip(true_coefs), x)

# Run fit
coefs, deg = xdem.fit.robust_polynomial_fit(x, y, linear_pkg=pkg_estimator[0], estimator_name=pkg_estimator[1], random_state=42)

# Check coefficients are constrained
assert deg == 3 or deg == 4
error_margins = [100, 5, 2, 1]
for i in range(4):
assert coefs[i] == pytest.approx(true_coefs[i], abs=error_margins[i])

def test_robust_polynomial_fit_noise_and_outliers(self):

np.random.seed(42)

# Define x vector
x = np.linspace(1,10,1000)
# Define an exact polynomial
true_coefs = [-100, 5, 3, 2]
y = np.polyval(np.flip(true_coefs), x)
# Add some noise on top
y += np.random.normal(loc=0, scale=3, size=1000)
# Add some outliers
y[50:75] = 0
y[900:925] = 1000

# Run with the "Linear" estimator
coefs, deg = xdem.fit.robust_polynomial_fit(x,y, estimator_name='Linear', linear_pkg='scipy',
loss='soft_l1', f_scale=0.5)

# Scipy solution should be quite robust to outliers/noise (with the soft_l1 method and f_scale parameter)
# However, it is subject to random processes inside the scipy function (couldn't find how to fix those...)
# It can find a degree 3, or 4 with coefficient close to 0
assert deg in [3, 4]
acceptable_scipy_linear_margins = [3, 3, 1, 1]
for i in range(4):
assert coefs[i] == pytest.approx(true_coefs[i], abs=acceptable_scipy_linear_margins[i])

# The sklearn Linear solution with MSE cost function will not be robust
coefs2, deg2 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Linear', linear_pkg='sklearn',
cost_func=mean_squared_error, margin_improvement=50)
# It won't find the right degree because of the outliers and noise
assert deg2 != 3
# Using the median absolute error should improve the fit
coefs3, deg3 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Linear', linear_pkg='sklearn',
cost_func=median_absolute_error, margin_improvement=50)
# Will find the right degree, but won't find the right coefficients because of the outliers and noise
assert deg3 == 3
sklearn_linear_error = [50, 10, 5, 0.5]
for i in range(4):
assert np.abs(coefs3[i] - true_coefs[i]) > sklearn_linear_error[i]

# Now, the robust estimators
# Theil-Sen should have better coefficients
coefs4, deg4 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Theil-Sen', random_state=42)
assert deg4 == 3
# High degree coefficients should be well constrained
assert coefs4[2] == pytest.approx(true_coefs[2], abs=1)
assert coefs4[3] == pytest.approx(true_coefs[3], abs=1)

# RANSAC is not always optimal, here it does not work well
coefs5, deg5 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='RANSAC', random_state=42)
assert deg5 != 3

# Huber should perform well, close to the scipy robust solution
coefs6, deg6 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Huber')
assert deg6 == 3
for i in range(3):
assert coefs6[i+1] == pytest.approx(true_coefs[i+1], abs=1)

@pytest.mark.skip('This test randomly fails in CI: issue opened.')
def test_robust_sumsin_fit(self):

# Define X vector
x = np.linspace(0, 10, 1000)
# Define exact sum of sinusoid signal
true_coefs = np.array([(5, 1, np.pi),(3, 0.3, 0)]).flatten()
y = xdem.fit._sumofsinval(x, params=true_coefs)

# Check that the function runs
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, random_state=42)

# Check that the estimated sum of sinusoid correspond to the input
for i in range(2):
assert coefs[3*i] == pytest.approx(true_coefs[3*i], abs=0.02)

# Check that using custom arguments does not trigger an error
bounds = [(3,7),(0.1,3),(0,2*np.pi),(1,7),(0.1,1),(0,2*np.pi),(0,1),(0.1,1),(0,2*np.pi)]
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, bounds_amp_freq_phase=bounds, nb_frequency_max=2,
hop_length=0.01, random_state=42)

def test_robust_simsin_fit_noise_and_outliers(self):

# Check robustness to outliers
np.random.seed(42)
# Define X vector
x = np.linspace(0, 10, 1000)
# Define exact sum of sinusoid signal
true_coefs = np.array([(5, 1, np.pi), (3, 0.3, 0)]).flatten()
y = xdem.fit._sumofsinval(x, params=true_coefs)

# Add some noise
y += np.random.normal(loc=0, scale=0.25, size=1000)
# Add some outliers
y[50:75] = -10
y[900:925] = 10

# Define first guess for bounds and run
bounds = [(3, 7), (0.1, 3), (0, 2 * np.pi), (1, 7), (0.1, 1), (0, 2 * np.pi), (0, 1), (0.1, 1), (0, 2 * np.pi)]
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, random_state=42, bounds_amp_freq_phase=bounds)

# Should be less precise, but still on point
# We need to re-order output coefficient to match input
if coefs[3] > coefs[0]:
coefs = np.concatenate((coefs[3:],coefs[0:3]))

# Check values
for i in range(2):
assert coefs[3*i] == pytest.approx(true_coefs[3*i], abs=0.2)
assert coefs[3 * i +1] == pytest.approx(true_coefs[3 * i +1], abs=0.2)
error_phase = min(np.abs(coefs[3 * i + 2] - true_coefs[ 3* i + 2]), np.abs(2* np.pi - (coefs[3 * i + 2] - true_coefs[3* i + 2])))
assert error_phase < 0.2
5 changes: 2 additions & 3 deletions tests/test_spatial_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Functions to test the spatial tools.
"""
from __future__ import annotations

import os
import shutil
import subprocess
Expand All @@ -27,7 +28,6 @@ def test_dem_subtraction():

assert np.nanmean(np.abs(diff.data)) < 100


class TestMerging:
"""
Test cases for stacking and merging DEMs
Expand Down Expand Up @@ -211,7 +211,6 @@ def test_get_array_and_mask(
else:
assert not np.shares_memory(array, arr_view)


class TestSubsample:
"""
Different examples of 1D to 3D arrays with masked values for testing.
Expand Down Expand Up @@ -265,4 +264,4 @@ def test_subsample(self, array):
assert np.ndim(indices) == 2
assert len(indices) == np.ndim(array)
assert np.ndim(array[indices]) == 1
assert np.size(array[indices]) == int(np.size(array) * 0.3)
assert np.size(array[indices]) == int(np.size(array) * 0.3)
2 changes: 1 addition & 1 deletion tests/test_terrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_attribute_functions(self, attribute: str) -> None:
diff = (attr_xdem - attr_gdal).filled(np.nan)
try:
assert np.nanmean(diff) < 5
assert xdem.spatial_tools.nmad(diff) < 5
assert xdem.spatialstats.nmad(diff) < 5
except Exception as exception:

if PLOT:
Expand Down
2 changes: 1 addition & 1 deletion xdem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import coreg, dem, examples, spatial_tools, spatialstats, volume, filters, terrain
from . import coreg, dem, examples, spatial_tools, spatialstats, volume, filters, fit, terrain
from .ddem import dDEM
from .dem import DEM
from .demcollection import DEMCollection
8 changes: 4 additions & 4 deletions xdem/coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def ramp(x_coordinates: np.ndarray, y_coordinates: np.ndarray) -> np.ndarray:
if metadata is not None:
metadata["deramp"] = {
"coefficients": coefficients,
"nmad": xdem.spatial_tools.nmad(residuals(coefficients, valid_diffs, valid_x_coords, valid_y_coords, degree))
"nmad": xdem.spatialstats.nmad(residuals(coefficients, valid_diffs, valid_x_coords, valid_y_coords, degree))
}

# Return the function which can be used later.
Expand Down Expand Up @@ -744,7 +744,7 @@ def error(self, reference_dem: Union[np.ndarray, np.ma.masked_array],
inlier_mask=inlier_mask, transform=transform)

error_functions = {
"nmad": xdem.spatial_tools.nmad,
"nmad": xdem.spatialstats.nmad,
"median": np.median,
"mean": np.mean,
"std": np.std,
Expand Down Expand Up @@ -1246,7 +1246,7 @@ def _fit_func(self, ref_dem: np.ndarray, tba_dem: np.ndarray, transform: Optiona
# Calculate initial dDEM statistics
elevation_difference = ref_dem - aligned_dem
bias = np.nanmedian(elevation_difference)
nmad_old = xdem.spatial_tools.nmad(elevation_difference)
nmad_old = xdem.spatialstats.nmad(elevation_difference)
if verbose:
print(" Statistics on initial dh:")
print(" Median = {:.2f} - NMAD = {:.2f}".format(bias, nmad_old))
Expand Down Expand Up @@ -1291,7 +1291,7 @@ def _fit_func(self, ref_dem: np.ndarray, tba_dem: np.ndarray, transform: Optiona
# Update statistics
elevation_difference = ref_dem - aligned_dem
bias = np.nanmedian(elevation_difference)
nmad_new = xdem.spatial_tools.nmad(elevation_difference)
nmad_new = xdem.spatialstats.nmad(elevation_difference)
nmad_gain = (nmad_new - nmad_old) / nmad_old*100

if verbose:
Expand Down
Loading