Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xarray signal attenuation method #105

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 61 additions & 107 deletions echopype/clean/signal_attenuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

import numpy as np
import xarray as xr
from skimage.measure import label

from ..utils.mask_transformation import full as _full, lin as _lin, log as _log, twod as _twod
from ..utils.mask_transformation_xr import lin as _lin, line_to_square, log as _log

DEFAULT_RYAN_PARAMS = {"r0": 180, "r1": 280, "n": 30, "thr": -6, "start": 0}
DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}
Expand Down Expand Up @@ -55,56 +54,60 @@ def _ryan(source_Sv: xr.DataArray, desired_channel: str, parameters=DEFAULT_RYAN
r1 = parameters["r1"]
n = parameters["n"]
thr = parameters["thr"]
start = parameters["start"]
# start = parameters["start"]

selected_channel_Sv = source_Sv.sel(channel=desired_channel)
Sv = selected_channel_Sv["Sv"].values
r = source_Sv["echo_range"].values[0, 0]
channel_Sv = source_Sv.sel(channel=desired_channel)
Sv = channel_Sv["Sv"]
r = channel_Sv["echo_range"][0]

# raise errors if wrong arguments
if r0 > r1:
raise Exception("Minimum range has to be shorter than maximum range")

# return empty mask if searching range is outside the echosounder range
# return empty mask if searching range is outside the echosounder range
if (r0 > r[-1]) or (r1 < r[0]):
warnings.warn("Searching range is outside the echosounder range. Returning empty mask.")
mask = np.zeros_like(Sv, dtype=bool)
mask_ = np.zeros_like(Sv, dtype=bool)

# turn layer boundaries into arrays with length = Sv.shape[1]
r0 = np.ones(Sv.shape[1]) * r0
r1 = np.ones(Sv.shape[1]) * r1

# start masking process
mask_ = np.zeros(Sv.shape, dtype=bool)
mask = np.zeros(Sv.shape, dtype=bool)
# find indexes for upper and lower SL limits
up = np.argmin(abs(r - r0))
lw = np.argmin(abs(r - r1))
for j in range(start, len(Sv)):
# TODO: now indexes are the same at every loop, but future
# versions will have layer boundaries with variable range
# (need to implement mask_layer.py beforehand!)

# mask where AS evaluation is unfeasible (e.g. edge issues, all-NANs)
if (j - n < 0) | (j + n > len(Sv) - 1) | np.all(np.isnan(Sv[j, up:lw])):
mask_[j, :] = True

# compare ping and block medians otherwise & mask ping if too different
else:
pingmedian = _log(np.nanmedian(_lin(Sv[j, up:lw])))
blockmedian = _log(np.nanmedian(_lin(Sv[(j - n) : (j + n), up:lw])))

if (pingmedian - blockmedian) < thr:
mask[j, :] = True

final_mask = np.logical_not(mask[start:, :] | mask_[start:, :])
return_mask = xr.DataArray(
final_mask,
dims=("ping_time", "range_sample"),
coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
# Raise a warning to inform the user
warnings.warn(
"The searching range is outside the echosounder range. "
"A default mask with all True values is returned, "
"which won't mask any data points in the dataset."
)
return xr.DataArray(
np.ones_like(Sv, dtype=bool),
dims=("ping_time", "range_sample"),
coords={"ping_time": source_Sv.ping_time, "range_sample": source_Sv.range_sample},
)

# get upper and lower range indexes
up = abs(r - r0).argmin(dim="range_sample").values
lw = abs(r - r1).argmin(dim="range_sample").values

ping_median = _log(_lin(Sv).median(dim="range_sample", skipna=True))
# ping_75q = _log(_lin(Sv).reduce(np.nanpercentile, q=75, dim="range_sample"))

block = Sv[:, up:lw]
block_list = [block.shift({"ping_time": i}) for i in range(-n, n)]
concat_block = xr.concat(block_list, dim="range_sample")
block_median = _log(_lin(concat_block).median(dim="range_sample", skipna=True))

noise_column = (ping_median - block_median) > thr

noise_column_mask = xr.DataArray(
data=line_to_square(noise_column, Sv, "range_sample").transpose(),
dims=Sv.dims,
coords=Sv.coords,
)
return return_mask
noise_column_mask = ~noise_column_mask

nan_mask = Sv.isnull()
nan_mask = nan_mask.reduce(np.any, dim="range_sample")

# uncomment these if we want to mask the areas where we couldn't calculate
# nan_mask[0:n] = False
# nan_mask[-n:] = False

mask = nan_mask & noise_column_mask
return mask


def _ariza(source_Sv, desired_channel, parameters=DEFAULT_ARIZA_PARAMS):
Expand All @@ -117,78 +120,29 @@ def _ariza(source_Sv, desired_channel, parameters=DEFAULT_ARIZA_PARAMS):
source_Sv (xr.DataArray): Sv array
selected_channel (str): name of the channel to process
parameters(dict): dict of parameters, containing:
offset (int):
m (int):
n (int):
thr (int):
seabed_mask: (xr.DataArray) - externally created seabed mask

Returns:
xr.DataArray: boolean array with AS mask, with ping_time and range_sample dims
"""
parameter_names = (
"thr",
"m",
"n",
"offset",
)
parameter_names = ("thr", "seabed")
if not all(name in parameters.keys() for name in parameter_names):
raise ValueError(
"Missing parameters - should be thr, m, n, offset, are" + str(parameters.keys())
"Missing parameters - should be thr, m, n, offset, seabed, are" + str(parameters.keys())
)
m = parameters["m"]
n = parameters["n"]
# m = parameters["m"]
# n = parameters["n"]
thr = parameters["thr"]
offset = parameters["offset"]

selected_channel_Sv = source_Sv.sel(channel=desired_channel)
Sv = selected_channel_Sv["Sv"].values
r = source_Sv["echo_range"].values[0, 0]

# get ping array
p = np.arange(len(Sv))
# set to NaN shallow waters and data below the Sv threshold
Sv_ = Sv.copy()
Sv_[0 : np.nanargmin(abs(r - offset)), :] = np.nan
Sv_[Sv_ < thr[0]] = np.nan
# bin Sv
# TODO: update to 'twod' and 'full' functions
# DID
irvals = np.round(np.linspace(p[0], p[-1], num=int((p[-1] - p[0]) / n) + 1))
jrvals = np.linspace(r[0], r[-1], num=int((r[-1] - r[0]) / m) + 1)
Sv_bnd, p_bnd, r_bnd = _twod(Sv_, p, r, irvals, jrvals, operation="mean")[0:3]
Sv_bnd = _full(Sv_bnd, p_bnd, r_bnd, p, r)[0]
# label binned Sv data features
Sv_lbl = label(~np.isnan(Sv_bnd))
labels = np.unique(Sv_lbl)
labels = np.delete(labels, np.where(labels == 0))
# list the median values for each Sv feature
val = []
for lbl in labels:
val.append(_log(np.nanmedian(_lin(Sv_bnd[Sv_lbl == lbl]))))

# keep the feature with a median above the Sv threshold (~seabed)
# and set the rest of the array to NaN
if val:
if np.nanmax(val) > thr[1]:
labels = labels[val != np.nanmax(val)]
for lbl in labels:
Sv_bnd[Sv_lbl == lbl] = np.nan
else:
Sv_bnd[:] = np.nan
else:
Sv_bnd[:] = np.nan

# remove everything in the original Sv array that is not seabed
Sv_sb = Sv.copy()
Sv_sb[np.isnan(Sv_bnd)] = np.nan

# compute the percentile 90th for each ping, at the range at which
# the seabed is supposed to be.
seabed_percentile = _log(np.nanpercentile(_lin(Sv_sb), 95, axis=0))

# get mask where this value falls below a Sv threshold (seabed breaches)
mask = seabed_percentile < thr[0]
mask = np.tile(mask, [len(Sv), 1])
# offset = parameters["offset"]
seabed = parameters["seabed"]

channel_Sv = source_Sv.sel(channel=desired_channel)
Sv = channel_Sv["Sv"]

Sv_sb = Sv.copy(deep=True).where(seabed, np.isnan)
seabed_percentile = _log(_lin(Sv_sb).reduce(dim="range_sample", func=np.nanpercentile, q=95))
mask = line_to_square(seabed_percentile < thr[0], Sv, dim="range_sample").transpose()
return_mask = xr.DataArray(
mask,
dims=("ping_time", "range_sample"),
Expand Down
9 changes: 6 additions & 3 deletions echopype/tests/clean/test_signal_attenuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@


DEFAULT_RYAN_PARAMS = {"r0": 180, "r1": 280, "n": 30, "thr": -6, "start": 0}
DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}

# commented ariza out since the current interpretation relies on a
# preexisting seabed mask, which is not available in this PR
# DEFAULT_ARIZA_PARAMS = {"offset": 20, "thr": (-40, -35), "m": 20, "n": 50}


@pytest.mark.parametrize(
"method,parameters,expected_true_false_counts",
[
("ryan", DEFAULT_RYAN_PARAMS, (1613100, 553831)),
("ariza", DEFAULT_ARIZA_PARAMS, (39897, 2127034)),
("ryan", DEFAULT_RYAN_PARAMS, (1838934, 327997)),
# ("ariza", DEFAULT_ARIZA_PARAMS, (39897, 2127034)),
],
)
def test_get_signal_attenuation_mask(
Expand Down
Loading