This notebook explores the stenella and risso data via ncs and dbdreader

In [None]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import dbdreader
from esdglider import gcp, glider, utils

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

deployment_name = "stenella-20250414"
# deployment_name = "risso-20250414"
mode = "delayed"


# Standard 
bucket_name = 'amlr-gliders-deployments-dev'
deployments_path = f"/home/sam_woodman_noaa_gov/{bucket_name}"
config_path = f"/home/sam_woodman_noaa_gov/glider-lab/deployment-configs"

gcp.gcs_mount_bucket("amlr-gliders-deployments-dev", deployments_path, ro=False)
deployment_info = {
    "deploymentyaml": os.path.join(config_path, f"{deployment_name}.yml"), 
    "mode": mode, 
}
paths = glider.get_path_glider(deployment_info, deployments_path)
paths.keys()

## Timesreis exp

In [None]:
ds_raw = xr.load_dataset(paths["tsrawpath"])
# df_raw = ds_raw.to_pandas()
display(ds_raw)

ds_sci = xr.load_dataset(paths["tsscipath"])
# df_sci = ds_sci.to_pandas()
display(ds_sci)

ds_gr5 = xr.load_dataset(paths["gr5path"])
# df_sci = ds_sci.to_pandas()
display(ds_gr5)

### Gridded 

In [None]:
ds_gr5["temperature"].count(dim="depth").time

In [None]:
# display(ds_gr5["conductivity"].count(dim="depth"))
# ds_gr5["chlorophyll"].count(dim="depth")

# 2. Define the list of variables you want to plot
var_list = ["temperature", "chlorophyll", "oxygen_concentration", "par"]
# var_list = ["chlorophyll"]

# 3. Create the plot

for var in var_list:
    # Calculate the non-NaN counts for the variable along the 'depth' dimension
    da = ds_gr5[var].count(dim="depth")
    # da[da == 0] = np.nan
    
    fig = plt.figure(figsize=(10, 6))
    ax = plt.gca() # Get current axes
    
    # # Plot the counts. xarray's plot method automatically uses the time coordinate.
    da.where(da > 0).plot(ax=ax, label=f"{var} count", marker='o', linestyle='none')
    # plt.scatter(da.time, da, s=3)

    da.where(da == 0).plot(ax=ax, color='red', marker='o', linestyle='none')


    # 4. Customize and show the plot
    ax.set_title("Non-NaN Data Points Over Time")
    ax.set_xlabel("Time")
    ax.set_ylabel("Number of Non-NaN Values (along depth)")
    ax.legend()
    plt.grid(True, linestyle='--', alpha=0.6)

    plt.tight_layout() # Adjusts plot to prevent labels from being cut off

    ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
    fig.autofmt_xdate()
    plt.show()

### Timeseries

In [None]:
# Science dataset

depth_var = "depth_measured"

var_list = ["temperature", "chlorophyll", "oxygen_concentration", "par"]
# var_list = ["chlorophyll"]
for var in var_list:
    # This is the key step to filter your data before plotting
    # ds = ds_sci[var].dropna(dim='time')
    ds = ds_sci

    # 3. Create the scatter plot
    fig, ax = plt.subplots(figsize=(14, 7)) # Create a figure to plot on
    # plt.scatter(ds['time'], ds[var], color='blue', label=f'Non-nan Data for {var}')
    plt.scatter(ds.time, ds[depth_var], c=ds[var], s=3)
    
    plt.title(var)
    plt.xlabel('time')
    plt.ylabel(var)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.legend()
    plt.tight_layout() # Adjusts plot to prevent labels from being cut off

    ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
    fig.autofmt_xdate()

    plt.show()


In [None]:
np.count_nonzero(np.isnan(ds_sci.depth_measured))

### plots module functions

In [None]:
import esdglider.plots as plots

# plots.sci_timeseries_plot("chlorophyll", ds_sci, depth_var="depth_measured", show=True)
# plots.sci_timesection_gt_plot("chlorophyll", ds_sci, depth_var="depth_measured")
plots.sci_timesection_plot("chlorophyll", ds_gr5)

## dbdreader exp

In [None]:
search = "*.[D|E|d|e][Bb][Dd]"
dbd = dbdreader.MultiDBD(
    pattern=f"{paths["binarydir"]}/{search}", 
    cacheDir=paths["cacdir"], 
    # skip_initial_line = False, 
)

sensors = [
    # "m_depth", 
    # "m_roll", 
    # "m_pitch", 
    # "sci_water_temp", 
    "sci_water_pressure", 
    "sci_flbbcd_chlor_units", 
    "sci_oxy4_oxygen", 
    "sci_bsipar_par", 
]
data_list = [(t, v) for (t, v) in dbd.get(*sensors, return_nans=True)]
data_time, data = zip(*data_list)

display(data_list)
display(data_time)
display(data)

The above block returns two tuples of arrays: an array of the various times, and an array of the various data values. The lengths of the arrays may be different, depending on if you extract engineering or science parameters

This next example shows going from dbdreader.get to a pandas dataframe

In [None]:
print(f"array lengths: {[len(i) for i in data_time]}")

index_dt64 = (data_time[0] * 1e9).astype('datetime64[ns]')
df = pd.DataFrame(np.column_stack(data), columns=sensors, index=index_dt64)
df