In [1]:
import os
from subprocess import run
from pyglider import slocum
from pyglider import utils
import numpy as np
import pandas as pd
import xarray as xr
from esdgliderutils.profiles import get_profiles_esd

In [4]:
deployment = 'john-20240312'
mode = 'delayed'
bucket_name = 'amlr-gliders-deployments-dev'

deployments_path = f'/home/sam_woodman_noaa_gov/{bucket_name}'
glider_path = os.path.join(deployments_path, 'REFOCUS', '2024', deployment)

cacdir = os.path.join(deployments_path, 'cache')
binarydir = os.path.join(glider_path, 'data', 'binary', mode)
deploymentyaml = os.path.join(glider_path, 'config', 
                              f"{deployment}-{mode}.yml")

l1tsdir = os.path.join(glider_path, 'data', 'nc', 'L1-timeseries')

run(["fusermount", "-u", deployments_path])
run(["gcsfuse", "--implicit-dirs", bucket_name,  deployments_path])


{"timestamp":{"seconds":1726072839,"nanos":97543585},"severity":"INFO","message":"Start gcsfuse/2.4.0 (Go version go1.22.4) for app \"\" using mount point: /home/sam_woodman_noaa_gov/amlr-gliders-deployments-dev\n"}
{"timestamp":{"seconds":1726072839,"nanos":97816713},"severity":"INFO","message":"GCSFuse mount command flags: {\"AppName\":\"\",\"Foreground\":false,\"ConfigFile\":\"\",\"MountOptions\":{},\"DirMode\":493,\"FileMode\":420,\"Uid\":-1,\"Gid\":-1,\"ImplicitDirs\":true,\"OnlyDir\":\"\",\"RenameDirLimit\":0,\"IgnoreInterrupts\":true,\"CustomEndpoint\":null,\"BillingProject\":\"\",\"KeyFile\":\"\",\"TokenUrl\":\"\",\"ReuseTokenFromUrl\":true,\"EgressBandwidthLimitBytesPerSecond\":-1,\"OpRateLimitHz\":-1,\"SequentialReadSizeMb\":200,\"AnonymousAccess\":false,\"MaxRetrySleep\":30000000000,\"MaxRetryAttempts\":0,\"StatCacheCapacity\":20460,\"StatCacheTTL\":60000000000,\"TypeCacheTTL\":60000000000,\"KernelListCacheTtlSeconds\":0,\"HttpClientTimeout\":0,\"RetryMultiplier\":2,\"TempDi

CompletedProcess(args=['gcsfuse', '--implicit-dirs', 'amlr-gliders-deployments-dev', '/home/sam_woodman_noaa_gov/amlr-gliders-deployments-dev'], returncode=0)

In [5]:
l1ts_outname_sci = slocum.binary_to_timeseries(
    binarydir, cacdir, l1tsdir, deploymentyaml,
    search='*.[d|e]bd', fnamesuffix='-sci',
    # search='*.[D|E]BD', fnamesuffix='',
    time_base='sci_water_temp', profile_filt_time=100,
    profile_min_time=300, maxgap=300)

In [None]:
l1ts_outname_oxy = slocum.binary_to_timeseries(
    binarydir, cacdir, l1tsdir, deploymentyaml,
    search='*.[d|e]bd', fnamesuffix='-oxy',
    # search='*.[D|E]BD', fnamesuffix='',
    time_base='sci_oxy4_oxygen', profile_filt_time=100,
    profile_min_time=300, maxgap=300)

In [8]:
l1ts_outname_eng = slocum.binary_to_timeseries(
    binarydir, cacdir, l1tsdir, deploymentyaml,
    search='*.[d|e]bd', fnamesuffix='-eng',
    # search='*.[D|E]BD', fnamesuffix='',
    time_base='m_depth', profile_filt_time=100,
    profile_min_time=300, maxgap=300)

In [7]:
ts_sci = xr.open_dataset(os.path.join(l1tsdir, f"{deployment}-sci.nc"))
ts_sci

In [3]:
ts_sci = xr.open_dataset(os.path.join(l1tsdir, f"{deployment}-sci.nc"))
ts_oxy = xr.open_dataset(os.path.join(l1tsdir, f"{deployment}-oxy.nc"))
ts_eng = xr.open_dataset(os.path.join(l1tsdir, f"{deployment}-eng.nc"))
ts_sci
# list(ts_sci.data_vars)

In [4]:
ts_oxy

In [5]:
ts_eng

In [None]:
# ts_sci.close()
# ts_oxy.close()
# ts_eng.close()

## Testing times

Main purpose of this section is to confirm that xarray.merge is doing what we expect

In [8]:
t1 = ts_sci.time.to_numpy()
t2 = ts_oxy.time.to_numpy()
t3 = ts_eng.time.to_numpy()

df_union = np.union1d(np.union1d(t1, t2), t3)
print(len(df_union))
df_union

1529328


array(['2024-03-12T19:39:02.704000000', '2024-03-12T19:39:06.904999936',
       '2024-03-12T19:39:22.776000000', ...,
       '2024-04-02T21:20:27.739000064', '2024-04-02T21:20:31.740999936',
       '2024-04-02T21:20:35.744000000'], dtype='datetime64[ns]')

## Combining datasets

In [9]:
vars_all = ['latitude', 'longitude', 'waypoint_latitude', 'waypoint_longitude']
sci_vars = vars_all + ['conductivity', 'temperature', 'pressure',
            'depth', 'salinity', 'potential_density', 'density', 'potential_temperature',
            'profile_index', 'profile_direction']
oxy_vars = vars_all + ["oxygen_concentration"]
eng_vars = vars_all + ["m_depth", "heading", "pitch", "roll"]

In [10]:
ts_sci = ts_sci[sci_vars]
ts_oxy = ts_oxy[oxy_vars]
ts_eng = ts_eng[eng_vars]

In [11]:
ts = xr.merge([ts_sci, ts_oxy, ts_eng], compat = "no_conflicts", 
              join  = "outer", combine_attrs = "override")
ts

In [None]:
# any(np.isnan(ts.latitude.values))
# good = ~np.isnan(ts.latitude.values + ts.longitude.values)

# print(np.nanmax(ts.latitude.values))
# print(np.max(ts.latitude.values[good]))

Recalculate attributes as necessary

In [None]:
good = ~np.isnan(ts.latitude.values + ts.longitude.values)
ts.attrs['geospatial_lat_max'] = np.nanmax(ts.latitude.values[good])
ts.attrs['geospatial_lat_min'] = np.nanmin(ts.latitude.values[good])
ts.attrs['geospatial_lon_max'] = np.nanmax(ts.longitude.values[good])
ts.attrs['geospatial_lon_min'] = np.nanmin(ts.longitude.values[good])
ts.attrs['geospatial_lat_units'] = 'degrees_north'
ts.attrs['geospatial_lon_units'] = 'degrees_east'

dt = ts.time.values
ts.attrs['time_coverage_start'] = '%s' % dt[0]
ts.attrs['time_coverage_end'] = '%s' % dt[-1]

ts.attrs['deployment_start'] = str(dt[0].astype('datetime64[s]'))
ts.attrs['deployment_end'] = str(dt[-1].astype('datetime64[s]'))

Recalculate profile values as necessary

In [None]:
depth_good = np.where(~np.isnan(ts.m_depth))[0]
ts.m_depth.values = np.interp(np.arange(len(ts.m_depth)), good, ts.m_depth.values[good])
ts = get_profiles_esd(ts, "m_depth")

# tg_ind = utils.find_gaps(ts.time.values[depth_good], ts.time.values, 300)
# np.where(tg_ind)
# dep = ts.m_depth.to_pandas()

ts = utils.get_distance_over_ground(ts)
ts

In [None]:
outname = os.path.join(l1tsdir, f"{deployment}-union.nc")
ts.to_netcdf(outname, 'w',
             encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z',
                                '_FillValue': np.nan,
                                'dtype': 'float64'}})