In [None]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import math

from esdglider import gcp, glider, utils

# deployment_info = {
#     "deployment": 'amlr08-20220513', 
#     "project": "SANDIEGO", 
#     "mode": 'delayed', 
#     "min_dt": '2022-05-13 18:56:55', 
# }
deployment_info = {
    "deployment": 'calanus-20241019', 
    "project": "ECOSWIM", 
    "mode": 'delayed', 
    "min_dt": '2024-10-19 17:37:00', 
}
# deployment_info = {
#     "deployment": 'amlr03-20231128', 
#     "project": "FREEBYRD", 
#     "mode": 'delayed', 
#     "min_dt": '2023-11-28 20:46', 
# }
deployment = deployment_info["deployment"]
project = deployment_info["project"]
mode = deployment_info["mode"]

year = utils.year_path(project, deployment)
bucket_name = 'amlr-gliders-deployments-dev'
deployments_path = f"/home/sam_woodman_noaa_gov/{bucket_name}"
config_path = f"/home/sam_woodman_noaa_gov/glider-lab/deployment-config"

gcp.gcs_mount_bucket("amlr-gliders-deployments-dev", deployments_path, ro=False)


paths = glider.get_path_deployment(
    deployment_info, deployments_path, config_path)

dir_ts = paths["tsdir"]
path_sci = os.path.join(dir_ts, f"{deployment}-{mode}-sci.nc")
path_eng = os.path.join(dir_ts, f"{deployment}-{mode}-eng.nc")

# dir_ngdac = paths["profdir"]
# if os.path.isdir(dir_ngdac):
#     files_ngdac = os.listdir(dir_ngdac)
# files_ngdac
# os.path.join(paths["profdir"], f"{deployment} -{mode}.nc")
# path_sci = os.path.join(dir_ts, 'calanus-20241019-sci.nc')
# path_eng = os.path.join(dir_ts, 'calanus-20241019-eng.nc')


In [None]:
ds_raw = xr.load_dataset(os.path.join(dir_ts, f"{deployment}-{mode}-raw.nc"))
df_raw = ds_raw.to_pandas()
display(ds_raw)

ds_eng = xr.load_dataset(path_eng)
df_eng = ds_eng.to_pandas()
display(ds_eng)

ds_sci = xr.load_dataset(path_sci)
df_sci = ds_sci.to_pandas()
display(ds_sci)

In [None]:
from pyglider import utils as pgutils
r1, r2 = utils.findProfiles(
        ds_raw.time.values,
        ds_raw.depth.values,
        # calanus
        stall=5,
        shake=0,
        inversion = 10, 
        interrupt = 120,
        period = 60, 
        length = 8, 
        # # amlr03
        # stall=0.5,
        # shake=0,
        # inversion = 10, 
        # interrupt = 600,
        # period = 60, 
        # length = 8, 
    )
vars_keep = ["longitude", "latitude", "depth", "profile_index", "profile_direction"]



ds_raw["profile_index"] = ('time', r1, {})
ds_raw["profile_direction"] = ('time', r2, {})
ds_raw = pgutils.get_distance_over_ground(ds_raw)
ds_raw = utils.data_var_reorder(ds_raw, (vars_keep))

print(ds_raw.profile_index.values.max())
ds_raw

In [None]:
print(f"There are {np.count_nonzero(np.isnan(ds_raw.profile_index))} nan profile indices")

prof_summ = utils.check_profiles(ds_raw)
prof_summ

In [None]:
# import glidertools as gt
# import pyglider.utils as pgutils
# import math
vars_keep = ["longitude", "latitude", "depth", "profile_index", "profile_direction"]
x_orig = utils.data_var_reorder(ds_raw, vars_keep)
regions_df_orig = utils.calc_regions(x_orig)

# pgutils.get_profiles_new(x, filt_time=20)
p1, p2 = utils.findProfiles(
        x_orig.time.values,
        x_orig.depth.values,
        # calanus
        stall=0.5,
        shake=0,
        inversion = 10, 
        interrupt = 120,
        period = 60, 
        length = 8, 
        # # amlr03
        # stall=0.5,
        # shake=0,
        # inversion = 10, 
        # interrupt = 600,
        # period = 60, 
        # length = 8, 
    )

orig = ["profile_index_orig", "profile_direction_orig"]
x = x_orig.rename_vars({"profile_index": orig[0], "profile_direction": orig[1]})
x["profile_index"] = ('time', p1, {})
x["profile_direction"] = ('time', p2, {})
x = utils.data_var_reorder(x, (vars_keep + orig))

utils.check_profiles(x)

# Explore
dt = "2024-10-26"
x2 = x.sel(time = slice(f"{dt} 21:00", dt))
x2_df = x2.to_pandas()
x2

In [None]:
time_values = ds_sci['time'].values
assigned_values = np.full(time_values.shape, np.nan, dtype=np.float64)
for _, row in prof_summ.iterrows():
    mask = (time_values >= row['start_time']) & (time_values <= row['end_time'])
    assigned_values[mask] = row['profile_index']

assigned_values
ds_sci['prof'] = (('time',), assigned_values)
print(f"There are {np.count_nonzero(np.isnan(ds_sci['prof']))} nan profile indices")

ds_sci

In [None]:
(ds_sci.profile_index - ds_sci.prof).to_pandas().value_counts()