### Develop Python code that replicates the calculations in Appendix B of Messié et al. 2019

The paper is at https://doi.org/10.1016/j.pocean.2018.12.010 and the Matlab code is at https://bitbucket.org/messiem/toolbox_blprocess/src/master/bl_window_smoothing.m

In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join("../src/data"))
if module_path not in sys.path:
    sys.path.append(module_path)
import numpy as np
import xarray as xr
import holoviews as hv
import hvplot.pandas
import ipywidgets as widgets
from logs2netcdfs import BASE_PATH, MISSIONNETCDFS

# Assumes that data have been processed locally using src/data/process_dorado.py
# Pick a typical Diamonnd mission with lots of peak flashes
# Share this view URL for this mission: http://stoqs.mbari.org/p/w2UREyQ
auv_name = "dorado"
mission = "2021.102.02"

In [None]:
netcdfs_dir = os.path.join(BASE_PATH, auv_name, MISSIONNETCDFS, mission)
opendap_base = os.path.join("http://dods.mbari.org:8181/opendap/data/auvctd/surveys/", mission.split('.')[0], 'netcdf')
# Use `opendap_base` to test new hyrax-docker opendap server
ds_align = xr.open_dataset(os.path.join(netcdfs_dir, f"{auv_name}_{mission}_align.nc"))
#ds_1S = xr.open_dataset(os.path.join(opendap_base, f"{auv_name}_{mission}_1S.nc"))
print("New hyrax-docker link: " + os.path.join(opendap_base, f"{auv_name}_{mission}_align.nc"))
ds_align

In [None]:
# From https://stackoverflow.com/a/65901580
def apply_rolling_data(data, col, function, window, step=1, labels=None):
    """Perform a rolling window analysis at the column `col` from `data`

    Given a dataframe `data` with time series, call `function` at
    sections of length `window` at the data of column `col`. Append
    the results to `data` at a new columns with name `label`.

    Parameters
    ----------
    data : DataFrame
        Data to be analyzed, the dataframe must stores time series
        columnwise, i.e., each column represent a time series and each
        row a time index
    col : str
        Name of the column from `data` to be analyzed
    function : callable
        Function to be called to calculate the rolling window
        analysis, the function must receive as input an array or
        pandas series. Its output must be either a number or a pandas
        series
    window : int
        length of the window to perform the analysis
    step : int
        step to take between two consecutive windows
    labels : str
        Name of the column for the output, if None it defaults to
        'MEASURE'. It is only used if `function` outputs a number, if
        it outputs a Series then each index of the series is going to
        be used as the names of their respective columns in the output

    Returns
    -------
    data : DataFrame
        Input dataframe with added columns with the result of the
        analysis performed

    """

    x = _strided_app(data[col].to_numpy(), window, step)
    rolled = np.apply_along_axis(function, 1, x)

    if labels is None:
        labels = [f"metric_{i}" for i in range(rolled.shape[1])]

    for col in labels:
        data[col] = np.nan

    data.loc[
        data.index[
            [False]*(window-1)
            + list(np.arange(len(data) - (window-1)) % step == 0)],
        labels] = rolled

    return data


def _strided_app(a, L, S):  # Window len = L, Stride len/stepsize = S
    """returns an array that is strided
    """
    nrows = ((a.size-L)//S)+1
    n = a.strides[0]
    return np.lib.stride_tricks.as_strided(
        a, shape=(nrows, L), strides=(S*n, n))

In [None]:
ds_align["biolume_raw"].sel(biolume_time60hz=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00"))

In [None]:
# Focus on a 10 minute period that is here in STOQS: http://stoqs.mbari.org/p/V3F_1d0
# Do not commit to the repo the output of this cell, it is too large
hv.extension("bokeh")   # See https://github.com/holoviz/holoviews/issues/4861#issuecomment-1239739743
subset_ds = ds_align[["biolume_raw"]].sel(biolume_time60hz=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00"))
raw_plot_2m = subset_ds["biolume_raw"].to_pandas().rename("raw").hvplot()
raw_plot_2m

In [None]:
subset_ds.head()

In [None]:
df = subset_ds.to_pandas()
bg_min_plot = apply_rolling_data(df, "biolume_raw", np.nanmin, 300, 1, labels=["bg_min",]).hvplot(y="bg_min")
bg_min_plot


In [None]:
raw_plot_2m * bg_min_plot