## Glider UP/Down Merge (to eliminate thermocline spikes)

__pyversion__==3.7   
__author__==S.Bell

Updated routine for evaluating glider data to compensate for salinity spikes at sharp interfaces.
Original code was developed with 2017 deployment in mind.  This code will work for 2017 and 2019+ (which evolved to have varying sample frequency's recorded by each instrument instead of all data on matching time intervals).

Data is ingested and read from an ERDDAP server run internally.  It is then saved for future erddap distribution.

Data to keep.  Merged profiles have T/S (see furthur description)/oxy/sigmat/chlor maintained.  bin to 1m. If up and down are both ok then only keep upcast

 Purpose:
 --------
 Subset Oculus Glider Data from downcast/upcast dives to singel location cast profiles.

 The cast profiles may be created with one of the following three assumptions:
    - downcast only, gridded to 1m bins, geolocation as last good surface point or linear
        interpolation if no surface point
    - upcast only, gridded to 1m bins, geolocation of first good surface point or linear 
        interpolation if no surface point ***keep***
    - hybrid, gridded to 1m bins, geolocation of last good surface point or linear interpolation 
        if no surface point.  **This is used to address salinity spikes in sharp interfaces**


 Assumptions:
 ------------
 dt/dz threshold on both up and donwcast set to *** 1 degC/m ***

 on merged profiles - keep: Temp, Salinity, Flourometry, PAR (u,d), (oxy?), lat, lon, depth/press


In [3]:
#System Stack
import os
import argparse

#Science Stack
import numpy as np
import pandas as pd
import xarray as xa
import seawater as sw

from erddapy import ERDDAP


In [4]:
### these routines assume xarray ingestion of netcdf files... either from orig files or erddap

def find_sharp_grad(xdf,thresh=-1.0):
    ### find thresholds in cast
    # lower bound to downcast
    # bottom of profile
    # upper bound of upcast

    dtdz_down_thresh = thresh
    dtdz_up_thresh = thresh
    dtdz = np.gradient(xdf.temperature,xdf.depth)

    ### Assuming a two layer system with a sharp interface 
    #    Find the bottom of the upper layer on the downcast
    #    Find the top of the bottom layer on the upcast
    # fail out of this try statement if none of the sharpness criterion are met
    upper_depth = xdf.depth[dtdz<dtdz_down_thresh][0]
    upper_depth_index = np.where(xdf.depth == upper_depth)[0] - 1 #make shallower by one
    if len(upper_depth_index) >1 :
        upper_depth_index = np.array([upper_depth_index[0]])
    bottom_depth = xdf.depth.max()
    bottom_depth_index = np.where(xdf.depth == bottom_depth)[0]
    lower_depth = xdf.depth[bottom_depth_index[0]:][dtdz[bottom_depth_index[0]:]<dtdz_up_thresh][0]
    lower_depth_index = np.where(xdf.depth == lower_depth)[0] - 1 #make deeper by one
            
    return (upper_depth,upper_depth_index,bottom_depth,bottom_depth_index,lower_depth,lower_depth_index)


def bin_ave(thinned_xarray_set,depth_bin,depth_bin_labels):
    df = xdfa.to_dataframe()
    bins=pd.cut(df.index, depth_bin, labels=depth_bin_labels)
    dfg = df.groupby(bins).mean()
        
    return dfg

def find_max_inversion(temperature=None,salinity=None,pressure=None):
    sigmat = sw.dens(s=salinity,t=temperature,p=pressure) - 1000.
    dtdz = np.gradient(sigmat,pressure)

    return np.nanmin(dtdz),np.nanargmin(dtdz)

### Fill Profile
# Scale both temperature and salinty to 0->1
# this maps the shape of the temperature profile to the salinity profile
def scale(x):
    return (x-min(x)) / (max(x) - min(x))

def rescale(x,y):
    return (1-x)*(y[1] - y[0]) + y[0]

In [5]:
# access dataset and get profileid's to loop through
server_url = 'http://downdraft.pmel.noaa.gov:8080/erddap'

d = ERDDAP(server=server_url,
           protocol='tabledap',
           response='csv',
          )

d.dataset_id='sg401_2017'

d.variables =  [
 'profileid',
]

df = d.to_pandas(
    skiprows=(1,)  # units information can be dropped.
).dropna()

df.sort_index(inplace=True)

In [6]:
#get profile and perform analysis
d = ERDDAP(server=server_url,
           protocol='tabledap',
           response='nc',
          )

d.dataset_id='sg401_2017'
d.response = 'nc'
d.variables =  [
    "profileid",
    "time",
    "salinity",
    "temperature",
    "pressure",
    "wlbb2fl_sig695nm_adjusted",
    "depth",
]
#download every profile individually or all at once?
#downloading all then analyzing is more efficient data transmission wise as these files are actually small
#may not be as easy when multiple data sets need to be combined.. maybe easier to go profile by profile
#d.constraints = { 'profileid=':'p4010005'}

ds = d.to_xarray(decode_times=False)

In [29]:
for i,k in df.iloc[1:10].iterrows():
    
    try:
        (upper_depth,upper_depth_index,bottom_depth,bottom_depth_index,lower_depth,lower_depth_index) = find_sharp_grad( ds.where(ds.profileid == k.profileid) )
    except:
        print("not gonna work")

  a = -(dx2)/(dx1 * (dx1 + dx2))
  c = dx1 / (dx2 * (dx1 + dx2))
  out[tuple(slice1)] = a * f[tuple(slice2)] + b * f[tuple(slice3)] + c * f[tuple(slice4)]
