This notebook is intended to show ways of using dbdreader to explore data directly

Also see https://dbdreader.readthedocs.io/en/latest/multiple_files.html

In [None]:
import os
import numpy as np
import pandas as pd
import dbdreader
from esdglider import gcp, glider, utils

# deployment_name = "stenella-20250414"
deployment_name = "amlr03-20231128"
mode = "delayed"


# Standard 
bucket_name = 'amlr-gliders-deployments-dev'
deployments_path = f"/home/sam_woodman_noaa_gov/{bucket_name}"
config_path = f"/home/sam_woodman_noaa_gov/glider-lab/deployment-configs"

gcp.gcs_mount_bucket("amlr-gliders-deployments-dev", deployments_path, ro=False)
deployment_info = {
    "deploymentyaml": os.path.join(config_path, f"{deployment_name}.yml"), 
    "mode": mode, 
}
paths = glider.get_path_deployment(deployment_info, deployments_path)

binarydir = paths["binarydir"]
binarydir

## Load all of the files

In [None]:
search = "*.[D|E|d|e][Bb][Dd]"
dbd = dbdreader.MultiDBD(
    pattern=f"{binarydir}/{search}", 
    cacheDir=paths["cacdir"], 
    # skip_initial_line = False, 
)

## Examples

### dbd.get

A basic example, reading in variables using dbd.get

In [None]:
# Example 1
sensors = [
    "m_depth", 
    "m_roll", 
    # "m_pitch", 
    # "sci_water_temp", 
    "sci_water_pressure", 
    "sci_flbbcd_chlor_units", 
    # "x_dbd_mission_number", 
    # "x_dbd_segment_number", 
]
data_list = [(t, v) for (t, v) in dbd.get(*sensors, return_nans=True)]
data_time, data = zip(*data_list)

display(data_list)
display(data_time)
display(data)

The above block returns two tuples of arrays: an array of the various times, and an array of the various data values. The lengths of the arrays may be different, depending on if you extract engineering or science parameters

This next example shows going from dbdreader.get to a pandas dataframe

In [None]:
# Example 2
sensors = [
    "m_depth", 
    "u_flbbcd_chlor_cwo", 
    "u_flbbcd_bb_cwo", 
    "u_flbbcd_cdom_cwo", 
    "u_flbbcd_chlor_sf", 
    "u_flbbcd_bb_sf", 
    "u_flbbcd_cdom_sf"
]
data_list = [(t, v) for (t, v) in dbd.get(*sensors, return_nans=True)]
data_time, data = zip(*data_list)

print(f"array lengths: {[len(i) for i in data_time]}")

index_dt64 = (data_time[0] * 1e9).astype('datetime64[ns]')
df = pd.DataFrame(np.column_stack(data), columns=sensors, index=index_dt64)
df

And a third example, showing how to use include_source=True to get the filename for each datapoint

In [None]:
# Example 3a
sensors = [
    "m_depth", 
    # "m_pitch", 
    "sci_water_temp", 
]
source_data = dbd.get(*sensors, return_nans=True, include_source=True)
data_list, s = zip(*source_data)
data_time, data = zip(*data_list)

fn = os.path.basename(s[1][11000].filename)
fn

In [None]:
# Example 3b
sensors = [
    "m_depth", 
    "m_roll", 
    "sci_water_pressure", 
    "sci_flbbcd_chlor_units", 
]
source_data_f = dbd.get(*sensors, return_nans=True, include_source=False)
source_data_t = dbd.get(*sensors, return_nans=True, include_source=True)

data_time1, data1 = zip(*source_data_f)
data_list = [(t, v) for (t, v) in source_data_f]
data_time2, data2 = zip(*data_list)

print(data_time1 == data_time2)
print(data1 == data2)
print(source_data_f[0][0] == source_data_t[0][0][0])

### dbd.get_sync

Read in variables using dbd.get_sync

In [None]:
# interpolate roll speed on depth time
tm,depth,roll,speed=dbd.get_sync("m_depth","m_roll","m_speed")

### Other

Explore other dbd functions. 

In [None]:
dbd.get_CTD_sync()

In [None]:
dbd.get_global_time_range()

In [None]:
for i,p in enumerate(dbd.parameterNames['eng']):
    print("%2d: %s"%(i,p))

## flbbcd correction factors

A new example, trying out the flbbcd correction factors. 

For background, emeail to Teledyne: "I realized that I forgot to put in the updated calibration coefficients for the FLBBCD (ecopuck) in our autoexec.mi before a glider deployment. How would I go about applying the correct calibration coefficients after we had completed a deployment?"

This chunk is exploring applying these correction factors 'by hand', ie after the deployment if the incorrect values were in the autoexec

In [None]:
sensors = [
    "sci_flbbcd_chlor_units", 
    "sci_flbbcd_chlor_sig", 
    "u_flbbcd_chlor_cwo", 
    "u_flbbcd_chlor_sf", 
]
source_data = dbd.get(*sensors, return_nans=True)
data_list, data = zip(*source_data)
# data_list = [(t, v) for (t, v) in dbd.get(*sensors, return_nans=True)]
# data_time, data = zip(*data_list)
display(data)

In [None]:
display(np.unique(data[2]))
display(np.unique(data[3]))

In [None]:
chlor_sf = 0.0071
chlor_cwo = 55

chlor_units = chlor_sf * (data[1]-chlor_cwo)
pd.DataFrame(chlor_units-data[0]).round(3).value_counts()