# Processing of Slocum Glider-AD2CP Data: RU29 2021 Mission 1

jgradone@marine.rutgers.edu     03/10/2022    Initial <br>
jgradone@marine.rutgers.edu     06/16/2022    Update for pre-processing <br>

**This Jupyter Notebook is intended to:**<br>
1) Read glider data frome ERDDAP <br>
2) Read in AD2CP data that has been pre-processed <br>
3) Least squares linear inversion on ADCP velocities referenced to true ocean velocity through a depth averaged urrent constraint <br>
4) Save output from each segment<br>

*Details/comments on what the functions are actually doing in the source code*

In [1]:
# Imports
import scipy.interpolate as interp
from scipy.sparse.linalg import lsqr
import scipy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import netCDF4 as nc
import math
import datetime
import xarray as xr
import matplotlib.dates as mdates
import dask.array as da
from erddapy import ERDDAP
from netCDF4 import Dataset
import gsw
import cmocean.cm as cmo
import sys
from datetime import datetime


## To import functions from Slocum-AD2CP GitHub repository, make this path the path to where the repo exists locally
sys.path.insert(0,'/home/jg1200/GitHub/Slocum-AD2CP/src/analysis/')
sys.path.insert(0,'/home/jg1200/GitHub/Slocum-AD2CP/src/data/')
from make_dataset import inversion, shear_method
from analysis import get_erddap_dataset

## Step 1: Load glider data

In [2]:
ds_id = 'ru29-20210630T1343-trajectory-raw-delayed'

## Load flight data
variables = ['depth', 'latitude', 'longitude', 'time', 'sci_water_temp', 'sci_water_cond','source_file', 'm_water_vx', 'm_water_vy', 'm_heading']
gdf = get_erddap_dataset(ds_id, server='http://slocum-data.marine.rutgers.edu/erddap', variables = variables, filetype='dataframe')
gdf.columns = variables

## Great way to find start and end times!!
start_times = gdf.groupby('source_file').first().time.values
end_times   = gdf.groupby('source_file').last().time.values
## Remove time zone for slicing ad2cp times
start_times2 = pd.to_datetime(start_times).tz_localize(None)
end_times2 = pd.to_datetime(end_times).tz_localize(None)

gdf

Unnamed: 0,depth,latitude,longitude,time,sci_water_temp,sci_water_cond,source_file,m_water_vx,m_water_vy,m_heading
0,0.000000,18.170740,-64.831853,2021-06-30T13:43:52Z,0.0000,0.00000,ru29-2021-180-3-0-dbd(04570000),,,
1,,18.170740,-64.831853,2021-06-30T13:43:57Z,,,ru29-2021-180-3-0-dbd(04570000),-0.100307,0.03465,2.53247
2,0.000000,18.170740,-64.831853,2021-06-30T13:43:57Z,0.0000,0.00000,ru29-2021-180-3-0-dbd(04570000),,,
3,,18.170740,-64.831853,2021-06-30T13:44:54Z,,,ru29-2021-180-3-0-dbd(04570000),-0.100307,0.03465,2.53247
4,,18.170465,-64.834150,2021-06-30T13:45:20Z,,,ru29-2021-180-3-0-dbd(04570000),,,
...,...,...,...,...,...,...,...,...,...,...
732984,0.238559,18.164417,-63.468548,2021-07-12T12:53:52Z,28.1624,5.63906,ru29-2021-192-0-9-dbd(04620009),,,
732985,0.198799,18.164417,-63.468548,2021-07-12T12:53:54Z,28.1629,5.63894,ru29-2021-192-0-9-dbd(04620009),,,
732986,0.178920,18.164417,-63.468548,2021-07-12T12:53:56Z,28.1619,5.63898,ru29-2021-192-0-9-dbd(04620009),,,
732987,0.198799,18.164417,-63.468548,2021-07-12T12:53:58Z,28.1565,5.63899,ru29-2021-192-0-9-dbd(04620009),,,


## Step 2: Load in AD2CP data frame

In [3]:
path = '/scratch/jg1200/Data/GliderData/RU29/RU29_2021_Mission_1/Pre_Processed/'
files = np.sort(glob.glob(path+'*.nc'))
tot_ad2cp = xr.open_mfdataset(files, concat_dim="time", combine="nested")
## Because files are not necessarily read in time order with above line
tot_ad2cp = tot_ad2cp.sortby('time')
tot_ad2cp

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.12 GiB 46.09 MiB Shape (40, 3752193) (40, 151036) Count 364 Tasks 91 Chunks Type float64 numpy.ndarray",3752193  40,

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.12 GiB 46.09 MiB Shape (40, 3752193) (40, 151036) Count 364 Tasks 91 Chunks Type float64 numpy.ndarray",3752193  40,

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.12 GiB 46.09 MiB Shape (40, 3752193) (40, 151036) Count 364 Tasks 91 Chunks Type float64 numpy.ndarray",3752193  40,

Unnamed: 0,Array,Chunk
Bytes,1.12 GiB,46.09 MiB
Shape,"(40, 3752193)","(40, 151036)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.31 MiB,589.98 kiB
Shape,"(3752193,)","(151036,)"
Count,364 Tasks,91 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 14.31 MiB 589.98 kiB Shape (3752193,) (151036,) Count 364 Tasks 91 Chunks Type float32 numpy.ndarray",3752193  1,

Unnamed: 0,Array,Chunk
Bytes,14.31 MiB,589.98 kiB
Shape,"(3752193,)","(151036,)"
Count,364 Tasks,91 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,28.63 MiB,1.15 MiB
Shape,"(3752193,)","(151036,)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 28.63 MiB 1.15 MiB Shape (3752193,) (151036,) Count 364 Tasks 91 Chunks Type float64 numpy.ndarray",3752193  1,

Unnamed: 0,Array,Chunk
Bytes,28.63 MiB,1.15 MiB
Shape,"(3752193,)","(151036,)"
Count,364 Tasks,91 Chunks
Type,float64,numpy.ndarray


## Steps 3-10: Big loop to process velocity data and save output

In [5]:
for x in np.arange(0,len(start_times)):
#for x in np.arange(216,len(start_times)):

    ## Subset glider df to one segment
    subsetgdf = gdf[(gdf.time >= start_times[x]) & (gdf.time <= end_times[x])]
    ## Pull out last non-NaN DAC
    ind         = np.argwhere(np.isnan(subsetgdf.m_water_vx).ravel()==False).flatten()
    vx          = subsetgdf.m_water_vx.iloc[ind[-1]]
    vy          = subsetgdf.m_water_vy.iloc[ind[-1]]
    
    ## Pull out last non-NaN lat/lon
    ind1         = np.argwhere(np.isnan(subsetgdf.longitude).ravel()==False).flatten()   
    vx_start_lon = subsetgdf.longitude.iloc[ind1[0]]
    vx_start_lat = subsetgdf.latitude.iloc[ind1[0]]
    vx_end_lon   = subsetgdf.longitude.iloc[ind1[-1]]
    vx_end_lat   = subsetgdf.latitude.iloc[ind1[-1]]
    ## Not using the index here because I want the segment start and end times
    vx_start_tm  = subsetgdf.time.iloc[0]
    vx_end_tm    = subsetgdf.time.iloc[-1]

    ## Check is glider has a max depth greater than 10 meters. If not, continue to next segment.
    ## This is needed because the glider opens and closes a log file at the surface while sending data and
    ## so indexing by unique file names gives a segment with just surface data (which we don't want).
    if subsetgdf.depth.max() < 10:
        continue
    
    ## Subset AD2CP dataframe based on segment start and end times
    #ad2cp_time_ind = np.where((tot_ad2cp.time >= start_times2[x]) & (tot_ad2cp.time <= end_times2[x]))[0]
    #subset_ad2cp = tot_ad2cp.sel(time=tot_ad2cp.time.values[ad2cp_time_ind])
    ad2cp_time_ind = np.where((tot_ad2cp.time.values >= start_times2[x]) & (tot_ad2cp.time.values <= end_times2[x]))[0]
    if len(ad2cp_time_ind) > 0:
        subset_ad2cp = tot_ad2cp.sel(time= slice(tot_ad2cp.time.values[ad2cp_time_ind[0]],tot_ad2cp.time.values[ad2cp_time_ind[-1]]))


        ## Just check if there is still data after the subsetting 
        if len(subset_ad2cp.time) > 0:
            ## Now ready for inversion!
            dz=5
            O_ls, G_ls, bin_new,obs_per_bin  = inversion(U=subset_ad2cp.UVelocity.values,V=subset_ad2cp.VVelocity.values,dz=dz,u_daverage=vx,v_daverage=vy,bins = subset_ad2cp['VelocityRange'].values, depth = subset_ad2cp['Pressure'].values,wDAC=5, wSmoothness=1)
            now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
            print("Finished Inversion", x ,"out of",len(start_times),"at" ,now)

            ###############################################
            #             Save master dataset             #
            ###############################################
            fname = "/home/jg1200/Data/GliderData/RU29/RU29_2021_Mission_1_AD2CP_NC/Processed_{}_meter_vertical_resolution/RU29_2021_Mission_1_AD2CP_Processed_Segment_{}.csv".format(dz,x)

            ## Make into a dataframe to save as a CSV
            d = {'inversion_u': np.real(O_ls), 'inversion_v': np.imag(O_ls), "inversion_depth": bin_new,
                     "start_lon": np.tile(vx_start_lon,len(bin_new)), "start_lat": np.tile(vx_start_lat,len(bin_new)),
                     "end_lon": np.tile(vx_end_lon,len(bin_new)), "end_lat": np.tile(vx_end_lat,len(bin_new)),
                     "start_tm": np.tile(vx_start_tm, len(bin_new)), "end_tm": np.tile(vx_end_tm, len(bin_new)),
                     "obs_per_bin": obs_per_bin}

            df = pd.DataFrame(data=d)
            df.to_csv(fname) 
            now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
            print("Finished Writing Data", x ,"out of",len(start_times),"at" ,now)
            del subset_ad2cp
        else:
            del subset_ad2cp



Finished Inversion 0 out of 233 at 06/27/22 21:56:58
Finished Writing Data 0 out of 233 at 06/27/22 21:56:58
Finished Inversion 2 out of 233 at 06/27/22 21:57:44
Finished Writing Data 2 out of 233 at 06/27/22 21:57:44
Finished Inversion 4 out of 233 at 06/27/22 21:58:37
Finished Writing Data 4 out of 233 at 06/27/22 21:58:37
Finished Inversion 6 out of 233 at 06/27/22 21:59:30
Finished Writing Data 6 out of 233 at 06/27/22 21:59:30
Finished Inversion 8 out of 233 at 06/27/22 22:00:50
Finished Writing Data 8 out of 233 at 06/27/22 22:00:50
Finished Inversion 10 out of 233 at 06/27/22 22:01:35
Finished Writing Data 10 out of 233 at 06/27/22 22:01:35
Finished Inversion 12 out of 233 at 06/27/22 22:01:53
Finished Writing Data 12 out of 233 at 06/27/22 22:01:53
Finished Inversion 13 out of 233 at 06/27/22 22:02:53
Finished Writing Data 13 out of 233 at 06/27/22 22:02:53
Finished Inversion 15 out of 233 at 06/27/22 22:03:29
Finished Writing Data 15 out of 233 at 06/27/22 22:03:29
Finished In

Finished Inversion 148 out of 233 at 06/27/22 22:57:52
Finished Writing Data 148 out of 233 at 06/27/22 22:57:52
Finished Inversion 150 out of 233 at 06/27/22 22:58:29
Finished Writing Data 150 out of 233 at 06/27/22 22:58:29
Finished Inversion 152 out of 233 at 06/27/22 22:58:56
Finished Writing Data 152 out of 233 at 06/27/22 22:58:56
Finished Inversion 154 out of 233 at 06/27/22 22:59:44
Finished Writing Data 154 out of 233 at 06/27/22 22:59:44
Finished Inversion 156 out of 233 at 06/27/22 23:02:01
Finished Writing Data 156 out of 233 at 06/27/22 23:02:01
Finished Inversion 158 out of 233 at 06/27/22 23:02:50
Finished Writing Data 158 out of 233 at 06/27/22 23:02:50
Finished Inversion 159 out of 233 at 06/27/22 23:03:46
Finished Writing Data 159 out of 233 at 06/27/22 23:03:46
Finished Inversion 161 out of 233 at 06/27/22 23:04:55
Finished Writing Data 161 out of 233 at 06/27/22 23:04:55
Finished Inversion 163 out of 233 at 06/27/22 23:06:02
Finished Writing Data 163 out of 233 at 0

## Do the same thing but for the shear method now

In [None]:
for x in np.arange(0,len(start_times)):

    ## Subset glider df to one segment
    subsetgdf = gdf[(gdf.time >= start_times[x]) & (gdf.time <= end_times[x])]
    ## Pull out last non-NaN DAC
    ind         = np.argwhere(np.isnan(subsetgdf.m_water_vx).ravel()==False).flatten()
    
    ## Check if there is a DAC, if not continue onto next segment.
    if len(ind) > 0:
        vx          = subsetgdf.m_water_vx.iloc[ind[-1]]
        vy          = subsetgdf.m_water_vy.iloc[ind[-1]]
        
        ## Pull out last non-NaN lat/lon
        ind1         = np.argwhere(np.isnan(subsetgdf.longitude).ravel()==False).flatten()   
        vx_start_lon = subsetgdf.longitude.iloc[ind1[0]]
        vx_start_lat = subsetgdf.latitude.iloc[ind1[0]]
        vx_end_lon   = subsetgdf.longitude.iloc[ind1[-1]]
        vx_end_lat   = subsetgdf.latitude.iloc[ind1[-1]]
        ## Not using the index here because I want the segment start and end times
        vx_start_tm  = subsetgdf.time.iloc[0]
        vx_end_tm    = subsetgdf.time.iloc[-1]

        ## Check is glider has a max depth greater than 10 meters. If not, continue to next segment.
        ## This is needed because the glider opens and closes a log file at the surface while sending data and
        ## so indexing by unique file names gives a segment with just surface data (which we don't want).
        if subsetgdf.depth.max() < 10:
            continue

        ## Subset AD2CP dataframe based on segment start and end times
        #ad2cp_time_ind = np.where((tot_ad2cp.time >= start_times2[x]) & (tot_ad2cp.time <= end_times2[x]))[0]
        #subset_ad2cp = tot_ad2cp.sel(time=tot_ad2cp.time.values[ad2cp_time_ind])
        ad2cp_time_ind = np.where((tot_ad2cp.time.values >= start_times2[x]) & (tot_ad2cp.time.values <= end_times2[x]))[0]
        if len(ad2cp_time_ind) > 0:
            subset_ad2cp = tot_ad2cp.sel(time= slice(tot_ad2cp.time.values[ad2cp_time_ind[0]],tot_ad2cp.time.values[ad2cp_time_ind[-1]]))

            ## Just check if there is still data after the subsetting 
            if len(subset_ad2cp.time) > 0:
                ## Now ready for shear method!!            
                dz=5
                vel_referenced, bin_centers, vel_referenced_std = shear_method(U=subset_ad2cp.UVelocity.values, V=subset_ad2cp.VVelocity.values, W=subset_ad2cp.WVelocity.values, vx=vx, vy=vy, bins = subset_ad2cp['VelocityRange'].values, depth = subset_ad2cp['Pressure'].values, dz=dz)
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Shear Method", x ,"out of",len(start_times),"at" ,now)

                ###############################################
                #             Save master dataset             #
                ###############################################
                fname = "/home/jg1200/Data/GliderData/RU29/RU29_2021_Mission_1_AD2CP_NC/Processed_shear_{}_meter_vertical_resolution/RU29_2021_Mission_1_AD2CP_Processed_Segment_{}.csv".format(dz,x)
                ## Make into a dataframe to save as a CSV

                # Make into a dataframe to save as a CSV
                d = {'U_shear_method': vel_referenced[:,0], 'V_shear_method': vel_referenced[:,1], "bin_new_shear": bin_centers,
                     "U_prof_std": vel_referenced_std[:,0], "V_prof_std": vel_referenced_std[:,1],
                     "start_lon": np.tile(vx_start_lon,len(bin_centers)), "start_lat": np.tile(vx_start_lat,len(bin_centers)),
                     "end_lon": np.tile(vx_end_lon,len(bin_centers)), "end_lat": np.tile(vx_end_lat,len(bin_centers)),
                     "start_tm": np.tile(vx_start_tm, len(bin_centers)), "end_tm": np.tile(vx_end_tm, len(bin_centers))}



                df = pd.DataFrame(data=d)
                df.to_csv(fname) 
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Writing Data", x ,"out of",len(start_times),"at" ,now)
                del subset_ad2cp
            else:
                del subset_ad2cp



  result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Finished Shear Method 0 out of 233 at 06/27/22 23:15:46
Finished Writing Data 0 out of 233 at 06/27/22 23:15:46
Finished Shear Method 2 out of 233 at 06/27/22 23:15:50
Finished Writing Data 2 out of 233 at 06/27/22 23:15:50
Finished Shear Method 4 out of 233 at 06/27/22 23:15:53
Finished Writing Data 4 out of 233 at 06/27/22 23:15:53
Finished Shear Method 6 out of 233 at 06/27/22 23:15:57
Finished Writing Data 6 out of 233 at 06/27/22 23:15:57
Finished Shear Method 8 out of 233 at 06/27/22 23:16:01
Finished Writing Data 8 out of 233 at 06/27/22 23:16:01
Finished Shear Method 10 out of 233 at 06/27/22 23:16:05
Finished Writing Data 10 out of 233 at 06/27/22 23:16:05
Finished Shear Method 12 out of 233 at 06/27/22 23:16:06
Finished Writing Data 12 out of 233 at 06/27/22 23:16:06
Finished Shear Method 13 out of 233 at 06/27/22 23:16:11
Finished Writing Data 13 out of 233 at 06/27/22 23:16:11
Finished Shear Method 15 out of 233 at 06/27/22 23:16:14
Finished Writing Data 15 out of 233 at 06

## Take a quick peak at results. Actual analysis in different notebook

In [None]:
path = "/home/jg1200/Data/GliderData/RU29/RU29_2021_Mission_1_AD2CP_NC/Processed/"
files = np.sort(glob.glob(path+'*.csv'))

df = pd.concat(map(pd.read_csv, files))

df

# ds = xr.Dataset(
#      {"inversion_u": (("depth","time"), df.inversion_u.values),
#      "inversion_v": (("depth", "time"), df.inversion_v.values)},
#      coords = {"depth" : np.unique(df.inversion_depth.values), "time" : np.unique(df.start_tm.values)})

In [None]:
inversion_depth   = np.arange(5,1030,10)
inversion_lat    = np.empty(len(files))
inversion_lat[:] = np.NaN
inversion_lon    = np.empty(len(files))
inversion_lon[:] = np.NaN
inversion_time    = np.empty(len(files))
inversion_time[:] = np.NaN

u_grid = np.empty((len(inversion_depth),len(files)))
u_grid[:] = np.NaN
v_grid = np.empty((len(inversion_depth),len(files)))
v_grid[:] = np.NaN


## Loop through by file, load in each file

for x in np.arange(0,len(files)):
    
    df = pd.read_csv(files[x])
    u_grid[np.arange(0,len(df.inversion_u.values)),x] = df.inversion_u.values
    v_grid[np.arange(0,len(df.inversion_v.values)),x] = df.inversion_v.values
    inversion_lat[x] = df.latitude.values[0]
    inversion_lon[x] = df.longitude.values[0]
    #inversion_time[x] = pd.to_datetime(df.start_tm.values[0])
      

In [None]:
plt.figure(figsize=(18,10))
plt.pcolormesh(u_grid,cmap='RdBu',vmin=-0.4,vmax=0.4)
plt.gca().invert_yaxis()
plt.colorbar(label='E-W Velocity [m/s]')
plt.title('RU29 2021 Anegada Mission 1')

In [None]:
plt.figure(figsize=(18,10))
plt.pcolormesh(v_grid,cmap='RdBu',vmin=-0.4,vmax=0.4)
plt.gca().invert_yaxis()
plt.colorbar(label='N-S Velocity [m/s]')
plt.title('RU29 2021 Anegada Mission 1')