### Develop Python code that replicates the calculations in Appendix B of Messié et al. 2019

The paper is at https://doi.org/10.1016/j.pocean.2018.12.010 and the Matlab code is at https://bitbucket.org/messiem/toolbox_blprocess/src/master/bl_window_smoothing.m

In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join("../src/data"))
if module_path not in sys.path:
    sys.path.append(module_path)
import numpy as np
import xarray as xr
import holoviews as hv
import hvplot.pandas
import ipywidgets as widgets
from logs2netcdfs import BASE_PATH, MISSIONNETCDFS

# Assumes that data have been processed locally with:
# src/data/process_dorado.py --mission 2021.102.02 -v
# (a Diamond mission with lots of peak flashes at head of Soquel Canyon)
# Share this view URL for this mission: http://stoqs.mbari.org/p/w2UREyQ
auv_name = "dorado"
mission = "2021.102.02"

In [None]:
# Load full resolution data from the _align.nc file into an xarray Dataset
netcdfs_dir = os.path.join(BASE_PATH, auv_name, MISSIONNETCDFS, mission)
opendap_base = os.path.join("http://dods.mbari.org:8181/opendap/data/auvctd/surveys/", mission.split('.')[0], 'netcdf')
# Use `opendap_base` with port 8181 to test new hyrax-docker opendap server
ds_align = xr.open_dataset(os.path.join(netcdfs_dir, f"{auv_name}_{mission}_align.nc"))
ds_1S = xr.open_dataset(os.path.join(opendap_base, f"{auv_name}_{mission}_1S.nc"))
print("New hyrax-docker link: " + os.path.join(opendap_base, f"{auv_name}_{mission}_align.nc"))
ds_align

In [None]:
# Focus on a 10 minute period that is here in STOQS: http://stoqs.mbari.org/p/V3F_1d0
# Do not commit to the repo the output of this cell and the ones below, they are too large
hv.extension("bokeh")   # See https://github.com/holoviz/holoviews/issues/4861#issuecomment-1239739743
subset_ds = ds_align[["biolume_raw"]].sel(biolume_time60hz=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00"))
df = subset_ds.to_pandas()
raw_10m_plot = subset_ds["biolume_raw"].to_pandas().rename("raw").hvplot(color="grey", width=1000, height=300, title="Raw 10 minute sample data")
raw_10m_plot

In [None]:
import pandas as pd
import rolling

def compute_bg_envelope(use_pandas_rolling):
    # (1) Dinoflagellate and zooplankton proxies

    from BLFilter import Filter # Borrowed from https://bitbucket.org/mbari/pybl/src/master/biolum/BLFilter.py
                                # See also https://bitbucket.org/mbari/pybl/src/master/test/biolum/BLFilter_test.py
    from scipy import signal

    sample_rate = 60
    window_size = 15 * sample_rate    # number is width of the filter in seconds

    # Compute background biolumenesence envelope
    filt = Filter(window_size=window_size, target_record_size=len(df))

    if use_pandas_rolling:
        min_bg_unsmoothed = df["biolume_raw"].rolling(window_size, min_periods=0, center=True).min()
        min_bg = min_bg_unsmoothed.rolling(window_size, min_periods=0, center=True).mean().values
        title = "Background envelope and peaks for 10 minute sample data - Pandas rolling"
    else:
        # BLFilter.Filter.apply_filter() takes 30 seconds for a mission - Pandas is likely faster
        min_bg_unsmoothed, min_bg = filt.apply_filter((df["biolume_raw"], []), rolling.Min)
        title = "Background envelope and peaks for 10 minute sample data - BLFilter"

    min_bg_unsmoothed_plot = pd.Series(min_bg_unsmoothed, index=df.index).rename("min_bg_unsmoothed").hvplot(
        width=1000, height=300, title=title
    )
    min_bg_plot = pd.Series(min_bg, index=df.index).rename("min_bg").hvplot()

    if use_pandas_rolling:
        med_bg_unsmoothed = df["biolume_raw"].rolling(window_size, min_periods=0, center=True).median()
        med_bg = med_bg_unsmoothed.rolling(window_size, min_periods=0, center=True).mean().values
    else:
        # BLFilter.Filter.apply_filter() takes 30 seconds for a mission - Pandas is likely faster
        med_bg_unsmoothed, med_bg = filt.apply_filter((df["biolume_raw"], []), rolling.Median)

    med_bg_unsmoothed_plot = pd.Series(med_bg_unsmoothed, index=df.index).rename("med_bg_unsmoothed").hvplot()
    med_bg_plot = pd.Series(med_bg, index=df.index).rename("med_bg").hvplot()

    above_bg = med_bg * 2.0 - min_bg
    above_bg_plot = pd.Series(above_bg, index=df.index).rename("above_bg").hvplot()

    # Find the high and low peaks
    flash_threshold = 1.e11
    peaks, _ = signal.find_peaks(df["biolume_raw"], height=above_bg)
    s_peaks = pd.Series(df["biolume_raw"][peaks], index=df.index[peaks])
    nbflash_high = s_peaks[s_peaks > flash_threshold]
    nbflash_low = s_peaks[s_peaks <= flash_threshold]
    nbflash_high_plot = nbflash_high.rename("nbflash_high").hvplot(kind="scatter", color="red", marker="star", s=50)
    nbflash_low_plot = nbflash_low.rename("nbflash_low").hvplot(kind="scatter", color="yellow", marker="star", s=50)

    # Plot everything together
    plots = min_bg_unsmoothed_plot * min_bg_plot * med_bg_plot * above_bg_plot * raw_10m_plot * nbflash_high_plot * nbflash_low_plot
    return med_bg, nbflash_high, nbflash_low, plots

In [None]:
# Visually compare the results of BLFilter.Filter.apply_filter() and Pandas rolling functions
med_bg, nbflash_high, nbflash_low, plots = compute_bg_envelope(use_pandas_rolling=False)
plots

In [None]:
# Visually compare the results of BLFilter.Filter.apply_filter() and Pandas rolling functions
med_bg, nbflash_high, nbflash_low, plots = compute_bg_envelope(use_pandas_rolling=True)
plots

In [None]:
# Construct full time series of background BL and flashes with NaNs for non-flash values
s_med_bg = pd.Series(med_bg, index=df.index).resample("1S").median()

s_nbflash_high = pd.Series(np.nan, index=df.index)
s_nbflash_high.loc[nbflash_high.index] = nbflash_high

s_nbflash_low = pd.Series(np.nan, index=df.index)
s_nbflash_low.loc[nbflash_low.index] = nbflash_low

# Count the number of flashes per second
nbflash_high_counts = s_nbflash_high.rolling(60, step=60, min_periods=0).count().resample("1S").mean()
nbflash_low_counts = s_nbflash_low.rolling(60, step=60, min_periods=0).count().resample("1S").mean()

# Compute flashes per liter
flow = ds_align[["biolume_flow"]].sel(biolume_time=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00"))["biolume_flow"].to_pandas().resample("1S").mean()
nbflash_high_per_liter = nbflash_high_counts.divide(flow) * 1000
nbflash_low_per_liter = nbflash_low_counts.divide(flow) * 1000

nbflash_high_per_liter_plot = nbflash_high_per_liter.rename("nbflash_high").hvplot(width=1000, height=300, title="Counts of flashes per liter", color="red")
nbflash_low_per_liter_plot = nbflash_low_per_liter.rename("nbflash_low").hvplot(color="yellow")
nbflash_high_per_liter_plot * nbflash_low_per_liter_plot


In [None]:
# Create Fig. 5 in the paper - Histogram of bg_BL/fluo ratio for the whole mission
# Compute the ratio of nighttime background BL to fluorescence from the 1S.nc file
fluo = ds_1S["hs2_fl700"].to_pandas()
fluo[fluo < 0] = 0.0    # Remove negative values
bg_BL = ds_1S["biolume_bg_biolume"].to_pandas()

bg_bl_fluo_ratio = bg_BL.divide(fluo).dropna().replace([np.inf, -np.inf], np.nan).dropna()
bg_bl_fluo_ratio_plot = bg_bl_fluo_ratio.rename("bg_bl_fluo_ratio").hvplot(width=1000, height=300, title="bg_BL/fluo ratio")
# Individual time series plots
#fluo.rename("fluo").hvplot()
#bg_BL.rename("bg_BL").hvplot()
#bg_bl_fluo_ratio_plot

# Histogram of bg_BL/fluo ratio - TODO: plot does not seem to be correct based on .describe() output
bg_bl_fluo_ratio_hist = bg_bl_fluo_ratio.rename("bg_bl_fluo_ratio").hvplot.hist(width=1000, height=300, title="Histogram of bg_BL/fluo ratio", bins=1000, logx=True, xlim=(1.e8, 1.e16) )  
print(bg_bl_fluo_ratio.describe())
bg_bl_fluo_ratio_hist

In [None]:
# (2) H-dino, a-dino and a-other proxies
# See https://bitbucket.org/mbari/pybl/src/master/biolum/BLProxies.py
# Proxy configs

# From: https://bitbucket.org/messiem/toolbox_blprocess/src/master/bl_proxies_fluobiolum.m
#	fluo: fluorescence (proxy for phytoplankton = adinos + aother)
#	bgrd_BL: background bioluminescence (proxy for dinoflagellates)
# 	ratioAdinos: typical bgrd_BL/fluo ratio for dinoflagellates populations, typically identified from an histogram over an entire dataset
#	calfactor: possible calibration to normalize the proxies (typically fluorescence 99th percentile). 
#		If not given, no calibration is applied (calfactor=1) and the proxies are given in fluorescence units.

# From: https://bitbucket.org/mbari/pybl/src/master/biolum/BLProxies.py
proxy_ratio_adinos = 2.65E+10
proxy_ratio_adinos = bg_BL.quantile(0.99) / fluo.quantile(0.99)
print(f"Using proxy_ratio_adinos = {proxy_ratio_adinos:.2e}, computed from 99th percentile of bg_BL and fluo")

proxy_cal_factor = 11.6739

# Compare with proxies loaded for this 10 minute period into stoqs_all_dorado: http://stoqs.mbari.org/p/HfbS8zQ
pseudo_fluorescence = s_med_bg / proxy_ratio_adinos
#fluo = ds_align["ecopuck_chl"].sel(ecopuck_time=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00")).to_pandas().resample('1S').mean().values
fluo = ds_align["hs2_fl700"].sel(hs2_time=slice("2021-04-13 10:00:00", "2021-04-13 10:10:00")).to_pandas().resample('1S').mean()
adinos = np.minimum(fluo.values, pseudo_fluorescence) / proxy_cal_factor
hdinos = (pseudo_fluorescence - np.minimum(fluo.values, pseudo_fluorescence)) / proxy_cal_factor

# Plot the proxies
pseudo_fluorescence_plot = pseudo_fluorescence.rename("pseudo_fluorescence").hvplot()
fluo_plot = fluo.rename("fluo").hvplot()
adinos_plot = pd.Series(adinos, index=s_med_bg.index).rename("adinos").hvplot()
hdinos_plot = pd.Series(hdinos, index=s_med_bg.index).rename("hdinos").hvplot()

pseudo_fluorescence_plot * fluo_plot * adinos_plot * hdinos_plot