### Develop Python code that replicates the calculations in Appendix B of Messié et al. 2019

The paper is at https://doi.org/10.1016/j.pocean.2018.12.010 and the Matlab code is at https://bitbucket.org/messiem/toolbox_blprocess/src/master/bl_window_smoothing.m

In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join("../src/data"))
if module_path not in sys.path:
    sys.path.append(module_path)
import numpy as np
import xarray as xr
import holoviews as hv
import hvplot.pandas
import ipywidgets as widgets
from logs2netcdfs import BASE_PATH, MISSIONNETCDFS

# Assumes that data have been processed locally with :
# src/data/process_dorado.py --mission 2021.102.02 -v
# (a typical Diamonnd mission with lots of peak flashes)
# Share this view URL for this mission: http://stoqs.mbari.org/p/w2UREyQ
auv_name = "dorado"
mission = "2021.102.02"

In [2]:
# Load full resolution data from the _align.nc file into an xarray Dataset
netcdfs_dir = os.path.join(BASE_PATH, auv_name, MISSIONNETCDFS, mission)
opendap_base = os.path.join("http://dods.mbari.org:8181/opendap/data/auvctd/surveys/", mission.split('.')[0], 'netcdf')
# Use `opendap_base` to test new hyrax-docker opendap server
ds_align = xr.open_dataset(os.path.join(netcdfs_dir, f"{auv_name}_{mission}_align.nc"))
#ds_1S = xr.open_dataset(os.path.join(opendap_base, f"{auv_name}_{mission}_1S.nc"))
print("New hyrax-docker link: " + os.path.join(opendap_base, f"{auv_name}_{mission}_align.nc"))
ds_align

In [3]:
# Focus on a 10 minute period that is here in STOQS: http://stoqs.mbari.org/p/V3F_1d0
# Do not commit to the repo the output of this cell, it is too large
hv.extension("bokeh")   # See https://github.com/holoviz/holoviews/issues/4861#issuecomment-1239739743
subset_ds = ds_align[["biolume_raw"]].sel(biolume_time60hz=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00"))
df = subset_ds.to_pandas()
raw_10m_plot = subset_ds["biolume_raw"].to_pandas().rename("raw").hvplot(color="grey", width=1000, height=300, title="Raw 10 minute sample data")
raw_10m_plot

In [25]:
# Borrowed from https://bitbucket.org/mbari/pybl/src/master/biolum/BLFilter.py
# See also https://bitbucket.org/mbari/pybl/src/master/test/biolum/BLFilter_test.py
import pandas as pd
import rolling
from BLFilter import Filter
from scipy import signal

sample_rate = 60
window_size = 3 * sample_rate    # width of the filter in seconds

# Compute background biolumenesence envelope
filt = Filter(window_size=window_size, target_record_size=len(df))

min_bg_unsmoothed, min_bg = filt.apply_filter((df["biolume_raw"], []), rolling.Min)
min_bg_unsmoothed_plot = pd.Series(min_bg_unsmoothed, index=df.index).rename("min_bg_unsmoothed").hvplot(
    width=1000, height=300, title="Background envelope and peaks for 10 minute sample data"
)
min_bg_plot = pd.Series(min_bg, index=df.index).rename("min_bg").hvplot()

med_bg_unsmoothed, med_bg = filt.apply_filter((df["biolume_raw"], []), rolling.Median)
med_bg_unsmoothed_plot = pd.Series(med_bg_unsmoothed, index=df.index).rename("med_bg_unsmoothed").hvplot()
med_bg_plot = pd.Series(med_bg, index=df.index).rename("med_bg").hvplot()

above_bg = med_bg * 2.0 - min_bg
above_bg_plot = pd.Series(above_bg, index=df.index).rename("above_bg").hvplot()

# Find the high and low peaks
flash_threshold = 1.e11
peaks, _ = signal.find_peaks(df["biolume_raw"], height=above_bg)
s_peaks = pd.Series(df["biolume_raw"][peaks], index=df.index[peaks])
nbflash_high = s_peaks[s_peaks > flash_threshold]
nbflash_low = s_peaks[s_peaks <= flash_threshold]
nbflash_high_plot = nbflash_high.rename("nbflash_high").hvplot(kind="scatter", color="red", marker="star", s=50)
nbflash_low_plot = nbflash_low.rename("nbflash_low").hvplot(kind="scatter", color="yellow", marker="star", s=50)

# Plot everything together
min_bg_unsmoothed_plot * min_bg_plot * med_bg_plot * above_bg_plot * raw_10m_plot * nbflash_high_plot * nbflash_low_plot

In [97]:
# Construct full time series of flashes with NaNs for non-flash values
s_nbflash_high = pd.Series(np.nan, index=df.index)
s_nbflash_high.loc[nbflash_high.index] = nbflash_high

s_nbflash_low = pd.Series(np.nan, index=df.index)
s_nbflash_low.loc[nbflash_low.index] = nbflash_low

# Count the number of flashes per second
nbflash_high_counts = s_nbflash_high.rolling(60, step=60, min_periods=0).count().resample("1S").mean()
nbflash_low_counts = s_nbflash_low.rolling(60, step=60, min_periods=0).count().resample("1S").mean()

nbflash_high_counts_plot = nbflash_high_counts.rename("nbflash_high_counts").hvplot(width=1000, height=300, title="Counts of high per second")
nbflash_low_counts_plot = nbflash_low_counts.rename("nbflash_low_counts").hvplot(width=1000, height=300, title="Counts of low flashes per second")
nbflash_high_counts_plot * nbflash_low_counts_plot
