# sample-misc

A sample notebook for miscellaneous code and experiments

In [6]:
# conda deactivate && conda env update --file glider-utils/environment.yml --prune
# from subprocess import run
# run(["/home/sam_woodman_noaa_gov/glider-utils/resources/sync-cache.sh"])

In [1]:
import os
from pyglider import slocum
from pyglider import utils
import numpy as np
import pandas as pd
import xarray as xr
from esdglider.pathutils import get_engyml_path
from esdglider.utils import postproc_eng_timeseries

In [2]:
cacdir = "../resources/example-data/cache"
binarydir = "../resources/example-data/binary-delayed"
deploymentyaml = "../resources/example-data/amlr08-20220513-delayed.yml"
engyaml = get_engyml_path()
outdir = "../resources/example-data/out"

In [None]:
l1ts_outname_sci = slocum.binary_to_timeseries(
    binarydir, cacdir, outdir, deploymentyaml,
    search='*.[D|E|d|e]bd', fnamesuffix='-sci',
    time_base='sci_water_temp', profile_filt_time=100,
    profile_min_time=300, maxgap=300)

In [None]:
l1ts_outname_eng = slocum.binary_to_timeseries(
    binarydir, cacdir, outdir, 
    [deploymentyaml, engyaml],
    search='*.[D|E|d|e]bd', fnamesuffix='-eng',
    # search='*.[D|E]BD', fnamesuffix='',
    time_base='m_depth', profile_filt_time=100,
    profile_min_time=300, maxgap=300)

In [None]:
ts_sci = xr.open_dataset(l1ts_outname_sci)
# ts_oxy = xr.open_dataset(os.path.join(l1tsdir, f"{deployment}-oxy.nc"))
ts_sci
# list(ts_sci.data_vars)

In [None]:
ts_eng = xr.open_dataset(l1ts_outname_eng)
ts_eng

In [None]:
postproc_eng_timeseries(ts_eng)

In [None]:
ts_eng.attrs["comment"]
print(not ts_eng.attrs["comment"].strip())

In [22]:
# ts_sci.close()
# ts_oxy.close()
# ts_eng.close()

## dbdreader exploration

In [3]:
import dbdreader
dbd = dbdreader.MultiDBD(pattern=f'{binarydir}/{'*.[D|E|d|e]bd'}',
                        cacheDir=cacdir)
# x=[v for _,v in dbd.get(*param_names, return_nans=True)]
# x

In [4]:
param_names = ["m_depth", "m_heading", "m_pitch", "m_roll", "m_tot_num_inflections"]
t1, d = dbd.get(param_names[0], return_nans=True)
t2, h = dbd.get(param_names[1], return_nans=True)
t3, p = dbd.get(param_names[2], return_nans=True)
t4, r = dbd.get(param_names[3], return_nans=True)
t5, i = dbd.get(param_names[4], return_nans=True)

In [None]:
# Find where array1 is NaN
nan_in_array1 = np.isnan(d)

# Check if values in arrays 2-4 are NOT NaN at positions where array1 is NaN
non_nan_in_arrays_2_to_4 = (~np.isnan(h) | ~np.isnan(p) | ~np.isnan(r)) & nan_in_array1

# Indices where array1 is NaN and any of arrays 2-4 is not NaN
indices = np.where(non_nan_in_arrays_2_to_4)[0]

# Output the indices
print("Indices where array1 is NaN and arrays 2-4 are not NaN:", indices)

## Testing times

Main purpose of this section is to confirm that xarray.merge is doing what we expect

In [None]:
t1 = ts_sci.time.to_numpy()
# t2 = ts_oxy.time.to_numpy()
t3 = ts_eng.time.to_numpy()

df_union = np.union1d(t1, t3)
print(len(t1))
print(len(t3))
print(len(df_union))
df_union

## Combining datasets

In [9]:
vars_all = ['latitude', 'longitude', 'waypoint_latitude', 'waypoint_longitude']
sci_vars = vars_all + ['conductivity', 'temperature', 'pressure',
            'depth', 'salinity', 'potential_density', 'density', 'potential_temperature',
            'profile_index', 'profile_direction']
oxy_vars = vars_all + ["oxygen_concentration"]
eng_vars = vars_all + ["m_depth", "heading", "pitch", "roll"]

In [10]:
ts_sci = ts_sci[sci_vars]
ts_oxy = ts_oxy[oxy_vars]
ts_eng = ts_eng[eng_vars]

In [None]:
ts = xr.merge([ts_sci, ts_oxy, ts_eng], compat = "no_conflicts", 
              join  = "outer", combine_attrs = "override")
ts

In [None]:
# any(np.isnan(ts.latitude.values))
# good = ~np.isnan(ts.latitude.values + ts.longitude.values)

# print(np.nanmax(ts.latitude.values))
# print(np.max(ts.latitude.values[good]))

Recalculate attributes as necessary

In [None]:
good = ~np.isnan(ts.latitude.values + ts.longitude.values)
ts.attrs['geospatial_lat_max'] = np.nanmax(ts.latitude.values[good])
ts.attrs['geospatial_lat_min'] = np.nanmin(ts.latitude.values[good])
ts.attrs['geospatial_lon_max'] = np.nanmax(ts.longitude.values[good])
ts.attrs['geospatial_lon_min'] = np.nanmin(ts.longitude.values[good])
ts.attrs['geospatial_lat_units'] = 'degrees_north'
ts.attrs['geospatial_lon_units'] = 'degrees_east'

dt = ts.time.values
ts.attrs['time_coverage_start'] = '%s' % dt[0]
ts.attrs['time_coverage_end'] = '%s' % dt[-1]

ts.attrs['deployment_start'] = str(dt[0].astype('datetime64[s]'))
ts.attrs['deployment_end'] = str(dt[-1].astype('datetime64[s]'))

Recalculate profile values as necessary

In [None]:
depth_good = np.where(~np.isnan(ts.m_depth))[0]
ts.m_depth.values = np.interp(np.arange(len(ts.m_depth)), good, ts.m_depth.values[good])
ts = get_profiles_esd(ts, "m_depth")

# tg_ind = utils.find_gaps(ts.time.values[depth_good], ts.time.values, 300)
# np.where(tg_ind)
# dep = ts.m_depth.to_pandas()

ts = utils.get_distance_over_ground(ts)
ts

In [None]:
outname = os.path.join(l1tsdir, f"{deployment}-union.nc")
ts.to_netcdf(outname, 'w',
             encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
                                '_FillValue': np.nan,
                                'dtype': 'float64'}})

## xarray selecting - ffill and bfill

Exploring the various 'method' arguments for xarray's sel

In [10]:
import xarray as xr
import pandas as pd

# da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])])
# display(da)

da = xr.DataArray(
    # np.random.rand(4),
    [1, 2, 3, 4], 
    [
        ("time", pd.date_range("2000-01-01", periods=4)),
        # ("space", ["IA", "IL", "IN"]),
    ],
)
# display(da)
ds = da.to_dataset(name="foo")
display(ds)

In [18]:
ds.sel(time=["2000-01-01 12:00:00"],method = 'bfill')

In [7]:
da.sel(x=[0.5, 1, 1.5, 2], method="bfill")