# Processing of Slocum Glider-AD2CP Data: RU29 2020

jgradone@marine.rutgers.edu     03/10/2022    Initial <br>
jgradone@marine.rutgers.edu     06/16/2022    Update for pre-processing <br>

**This Jupyter Notebook is intended to:**<br>
1) Read glider data frome ERDDAP <br>
2) Read in AD2CP data that has been pre-processed <br>
3) Least squares linear inversion on ADCP velocities referenced to true ocean velocity through a depth averaged urrent constraint <br>
4) Save output from each segment<br>

*Details/comments on what the functions are actually doing in the source code*

In [1]:
# Imports
import scipy.interpolate as interp
from scipy.sparse.linalg import lsqr
import scipy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import netCDF4 as nc
import math
import datetime
import xarray as xr
import matplotlib.dates as mdates
import dask.array as da
from erddapy import ERDDAP
from netCDF4 import Dataset
import gsw
import cmocean.cm as cmo
import sys
from datetime import datetime


## To import functions from Slocum-AD2CP GitHub repository, make this path the path to where the repo exists locally
sys.path.insert(0,'/home/jg1200/GitHub/Slocum-AD2CP/src/analysis/')
sys.path.insert(0,'/home/jg1200/GitHub/Slocum-AD2CP/src/data/')
from make_dataset import inversion, shear_method
from analysis import get_erddap_dataset

## Step 1: Load glider data

In [2]:
ds_id = 'ru29-20200908T1623-trajectory-raw-delayed'

## Load flight data
variables = ['depth', 'latitude', 'longitude', 'time', 'sci_water_temp', 'sci_water_cond','source_file', 'm_water_vx', 'm_water_vy', 'm_heading']
gdf = get_erddap_dataset(ds_id, server='http://slocum-data.marine.rutgers.edu/erddap', variables = variables, filetype='dataframe')
gdf.columns = variables

## Great way to find start and end times!!
start_times = gdf.groupby('source_file').first().time.values
end_times   = gdf.groupby('source_file').last().time.values
## Remove time zone for slicing ad2cp times
start_times2 = pd.to_datetime(start_times).tz_localize(None)
end_times2 = pd.to_datetime(end_times).tz_localize(None)

gdf

Unnamed: 0,depth,latitude,longitude,time,sci_water_temp,sci_water_cond,source_file,m_water_vx,m_water_vy,m_heading
0,0.00000,18.175290,-64.802942,2020-09-08T16:23:40.15Z,0.0000,0.00000,ru29-2020-251-4-0-mbd(04270000),,,
1,0.00000,18.175290,-64.802942,2020-09-08T16:23:47.43Z,0.0000,0.00000,ru29-2020-251-4-0-mbd(04270000),,,
2,,18.175290,-64.802942,2020-09-08T16:23:47.56Z,,,ru29-2020-251-4-0-mbd(04270000),,,1.80293
3,,18.175290,-64.802942,2020-09-08T16:24:48.43Z,,,ru29-2020-251-4-0-mbd(04270000),,,1.80293
4,,18.176045,-64.804093,2020-09-08T16:25:14.45Z,,,ru29-2020-251-4-0-mbd(04270000),,,
...,...,...,...,...,...,...,...,...,...,...
5087216,0.18886,18.139090,-64.784373,2020-11-12T07:31:32.76Z,27.6073,5.43973,ru29-2020-315-0-9-mbd(04470009),,,
5087217,0.18886,18.139090,-64.784373,2020-11-12T07:31:34.77Z,27.6072,5.43980,ru29-2020-315-0-9-mbd(04470009),,,
5087218,0.14910,18.139090,-64.784373,2020-11-12T07:31:36.79Z,27.6093,5.43990,ru29-2020-315-0-9-mbd(04470009),,,
5087219,0.15904,18.139090,-64.784373,2020-11-12T07:31:38.80Z,27.6093,5.43993,ru29-2020-315-0-9-mbd(04470009),,,


## Step 2: Load in AD2CP data frame

In [3]:
path = '/scratch/jg1200/Data/GliderData/RU29/RU29_2020/Pre_Processed/'
files = np.sort(glob.glob(path+'*.nc'))
tot_ad2cp = xr.open_mfdataset(files, concat_dim="time", combine="nested")
## Because files are not necessarily read in time order with above line
tot_ad2cp = tot_ad2cp.sortby('time')
tot_ad2cp

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.45 GiB 45.81 MiB Shape (40, 11580371) (40, 150116) Count 368 Tasks 92 Chunks Type float64 numpy.ndarray",11580371  40,

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.45 GiB 45.81 MiB Shape (40, 11580371) (40, 150116) Count 368 Tasks 92 Chunks Type float64 numpy.ndarray",11580371  40,

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.45 GiB 45.81 MiB Shape (40, 11580371) (40, 150116) Count 368 Tasks 92 Chunks Type float64 numpy.ndarray",11580371  40,

Unnamed: 0,Array,Chunk
Bytes,3.45 GiB,45.81 MiB
Shape,"(40, 11580371)","(40, 150116)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,44.18 MiB,586.39 kiB
Shape,"(11580371,)","(150116,)"
Count,368 Tasks,92 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 44.18 MiB 586.39 kiB Shape (11580371,) (150116,) Count 368 Tasks 92 Chunks Type float32 numpy.ndarray",11580371  1,

Unnamed: 0,Array,Chunk
Bytes,44.18 MiB,586.39 kiB
Shape,"(11580371,)","(150116,)"
Count,368 Tasks,92 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,88.35 MiB,1.15 MiB
Shape,"(11580371,)","(150116,)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 88.35 MiB 1.15 MiB Shape (11580371,) (150116,) Count 368 Tasks 92 Chunks Type float64 numpy.ndarray",11580371  1,

Unnamed: 0,Array,Chunk
Bytes,88.35 MiB,1.15 MiB
Shape,"(11580371,)","(150116,)"
Count,368 Tasks,92 Chunks
Type,float64,numpy.ndarray


## Steps 3-10: Big loop to process velocity data and save output

In [None]:
# AD2CP only on starting on segment 408 and off after segment 976
for x in np.arange(408,978):
    ## Subset glider df to one segment
    subsetgdf = gdf[(gdf.time >= start_times[x]) & (gdf.time <= end_times[x])]
    ## Pull out last non-NaN DAC
    ind         = np.argwhere(np.isnan(subsetgdf.m_water_vx).ravel()==False).flatten()
    
    ## Check if there is a DAC, if not continue onto next segment.
    if len(ind) > 0:
        vx          = subsetgdf.m_water_vx.iloc[ind[-1]]
        vy          = subsetgdf.m_water_vy.iloc[ind[-1]]
        
        ## Pull out last non-NaN lat/lon
        ind1         = np.argwhere(np.isnan(subsetgdf.longitude).ravel()==False).flatten()   
        vx_start_lon = subsetgdf.longitude.iloc[ind1[0]]
        vx_start_lat = subsetgdf.latitude.iloc[ind1[0]]
        vx_end_lon   = subsetgdf.longitude.iloc[ind1[-1]]
        vx_end_lat   = subsetgdf.latitude.iloc[ind1[-1]]
        ## Not using the index here because I want the segment start and end times
        vx_start_tm  = subsetgdf.time.iloc[0]
        vx_end_tm    = subsetgdf.time.iloc[-1]

        ## Check is glider has a max depth greater than 10 meters. If not, continue to next segment.
        ## This is needed because the glider opens and closes a log file at the surface while sending data and
        ## so indexing by unique file names gives a segment with just surface data (which we don't want).
        if subsetgdf.depth.max() < 10:
            continue

        ## Subset AD2CP dataframe based on segment start and end times
        #ad2cp_time_ind = np.where((tot_ad2cp.time >= start_times2[x]) & (tot_ad2cp.time <= end_times2[x]))[0]
        #subset_ad2cp = tot_ad2cp.sel(time=tot_ad2cp.time.values[ad2cp_time_ind])
        ad2cp_time_ind = np.where((tot_ad2cp.time.values >= start_times2[x]) & (tot_ad2cp.time.values <= end_times2[x]))[0]
        if len(ad2cp_time_ind) > 0:

            subset_ad2cp = tot_ad2cp.sel(time= slice(tot_ad2cp.time.values[ad2cp_time_ind[0]],tot_ad2cp.time.values[ad2cp_time_ind[-1]]))

            ## Just check if there is still data after the subsetting 
            if len(subset_ad2cp.time) > 0:
                # ## Now ready for inversion!
                dz=10
                O_ls, G_ls, bin_new,obs_per_bin  = inversion(U=subset_ad2cp.UVelocity.values,V=subset_ad2cp.VVelocity.values,dz=dz,u_daverage=vx,v_daverage=vy,bins = subset_ad2cp['VelocityRange'].values, depth = subset_ad2cp['Pressure'].values,wDAC=5, wSmoothness=1)
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Inversion", x ,"out of",len(start_times),"at" ,now)

                ###############################################
                #             Save master dataset             #
                ###############################################
                fname = "/home/jg1200/Data/GliderData/RU29/RU29_2020_AD2CP_NC/Processed_{}_meter_vertical_resolution/RU29_2020_AD2CP_Processed_Segment_{}.csv".format(dz,x)

                ## Make into a dataframe to save as a CSV
                d = {'inversion_u': np.real(O_ls), 'inversion_v': np.imag(O_ls), "inversion_depth": bin_new,
                     "start_lon": np.tile(vx_start_lon,len(bin_new)), "start_lat": np.tile(vx_start_lat,len(bin_new)),
                     "end_lon": np.tile(vx_end_lon,len(bin_new)), "end_lat": np.tile(vx_end_lat,len(bin_new)),
                     "start_tm": np.tile(vx_start_tm, len(bin_new)), "end_tm": np.tile(vx_end_tm, len(bin_new)),
                     "obs_per_bin": obs_per_bin}


                df = pd.DataFrame(data=d)
                df.to_csv(fname) 
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Writing Data", x ,"out of",len(start_times),"at" ,now)
                del subset_ad2cp
            else:
                del subset_ad2cp



Finished Inversion 408 out of 1081 at 06/27/22 20:33:52
Finished Writing Data 408 out of 1081 at 06/27/22 20:33:52
Finished Inversion 410 out of 1081 at 06/27/22 20:35:01
Finished Writing Data 410 out of 1081 at 06/27/22 20:35:01
Finished Inversion 412 out of 1081 at 06/27/22 20:36:09
Finished Writing Data 412 out of 1081 at 06/27/22 20:36:09
Finished Inversion 414 out of 1081 at 06/27/22 20:37:13
Finished Writing Data 414 out of 1081 at 06/27/22 20:37:13
Finished Inversion 416 out of 1081 at 06/27/22 20:38:22
Finished Writing Data 416 out of 1081 at 06/27/22 20:38:22
Finished Inversion 418 out of 1081 at 06/27/22 20:39:26
Finished Writing Data 418 out of 1081 at 06/27/22 20:39:26
Finished Inversion 421 out of 1081 at 06/27/22 20:40:28
Finished Writing Data 421 out of 1081 at 06/27/22 20:40:28
Finished Inversion 423 out of 1081 at 06/27/22 20:41:34
Finished Writing Data 423 out of 1081 at 06/27/22 20:41:34
Finished Inversion 425 out of 1081 at 06/27/22 20:42:40
Finished Writing Data 42

Finished Inversion 572 out of 1081 at 06/27/22 21:45:23
Finished Writing Data 572 out of 1081 at 06/27/22 21:45:56
Finished Inversion 575 out of 1081 at 06/27/22 21:47:02
Finished Writing Data 575 out of 1081 at 06/27/22 21:47:02
Finished Inversion 577 out of 1081 at 06/27/22 21:48:10
Finished Writing Data 577 out of 1081 at 06/27/22 21:48:10
Finished Inversion 579 out of 1081 at 06/27/22 21:49:10
Finished Writing Data 579 out of 1081 at 06/27/22 21:49:10
Finished Inversion 581 out of 1081 at 06/27/22 21:50:05
Finished Writing Data 581 out of 1081 at 06/27/22 21:50:05
Finished Inversion 583 out of 1081 at 06/27/22 21:51:07
Finished Writing Data 583 out of 1081 at 06/27/22 21:51:07
Finished Inversion 585 out of 1081 at 06/27/22 21:52:04
Finished Writing Data 585 out of 1081 at 06/27/22 21:52:04
Finished Inversion 586 out of 1081 at 06/27/22 21:53:01
Finished Writing Data 586 out of 1081 at 06/27/22 21:53:01
Finished Inversion 588 out of 1081 at 06/27/22 21:53:55
Finished Writing Data 58

Finished Inversion 727 out of 1081 at 06/27/22 23:02:59
Finished Writing Data 727 out of 1081 at 06/27/22 23:02:59
Finished Inversion 730 out of 1081 at 06/27/22 23:03:58
Finished Writing Data 730 out of 1081 at 06/27/22 23:03:58
Finished Inversion 732 out of 1081 at 06/27/22 23:04:57
Finished Writing Data 732 out of 1081 at 06/27/22 23:04:57
Finished Inversion 734 out of 1081 at 06/27/22 23:05:56
Finished Writing Data 734 out of 1081 at 06/27/22 23:05:56
Finished Inversion 736 out of 1081 at 06/27/22 23:06:58
Finished Writing Data 736 out of 1081 at 06/27/22 23:06:58
Finished Inversion 738 out of 1081 at 06/27/22 23:08:03
Finished Writing Data 738 out of 1081 at 06/27/22 23:08:03
Finished Inversion 742 out of 1081 at 06/27/22 23:09:13
Finished Writing Data 742 out of 1081 at 06/27/22 23:09:13
Finished Inversion 744 out of 1081 at 06/27/22 23:10:20
Finished Writing Data 744 out of 1081 at 06/27/22 23:10:20
Finished Inversion 746 out of 1081 at 06/27/22 23:11:09
Finished Writing Data 74

## Do the same thing but for the shear method now

In [None]:
# AD2CP only on starting on segment 408 and off after segment 976
for x in np.arange(408,978):

    ## Subset glider df to one segment
    subsetgdf = gdf[(gdf.time >= start_times[x]) & (gdf.time <= end_times[x])]
    ## Pull out last non-NaN DAC
    ind         = np.argwhere(np.isnan(subsetgdf.m_water_vx).ravel()==False).flatten()
    
    ## Check if there is a DAC, if not continue onto next segment.
    if len(ind) > 0:
        vx          = subsetgdf.m_water_vx.iloc[ind[-1]]
        vy          = subsetgdf.m_water_vy.iloc[ind[-1]]
        
        ## Pull out last non-NaN lat/lon
        ind1         = np.argwhere(np.isnan(subsetgdf.longitude).ravel()==False).flatten()   
        vx_start_lon = subsetgdf.longitude.iloc[ind1[0]]
        vx_start_lat = subsetgdf.latitude.iloc[ind1[0]]
        vx_end_lon   = subsetgdf.longitude.iloc[ind1[-1]]
        vx_end_lat   = subsetgdf.latitude.iloc[ind1[-1]]
        ## Not using the index here because I want the segment start and end times
        vx_start_tm  = subsetgdf.time.iloc[0]
        vx_end_tm    = subsetgdf.time.iloc[-1]

        ## Check is glider has a max depth greater than 10 meters. If not, continue to next segment.
        ## This is needed because the glider opens and closes a log file at the surface while sending data and
        ## so indexing by unique file names gives a segment with just surface data (which we don't want).
        if subsetgdf.depth.max() < 10:
            continue

        ## Subset AD2CP dataframe based on segment start and end times
        #ad2cp_time_ind = np.where((tot_ad2cp.time >= start_times2[x]) & (tot_ad2cp.time <= end_times2[x]))[0]
        #subset_ad2cp = tot_ad2cp.sel(time=tot_ad2cp.time.values[ad2cp_time_ind])
        ad2cp_time_ind = np.where((tot_ad2cp.time.values >= start_times2[x]) & (tot_ad2cp.time.values <= end_times2[x]))[0]
        if len(ad2cp_time_ind) > 0:
            subset_ad2cp = tot_ad2cp.sel(time= slice(tot_ad2cp.time.values[ad2cp_time_ind[0]],tot_ad2cp.time.values[ad2cp_time_ind[-1]]))

            ## Just check if there is still data after the subsetting 
            if len(subset_ad2cp.time) > 0:
                ## Now process AD2CP!

                ## Now ready for shear method!!            
                dz=10
                vel_referenced, bin_centers, vel_referenced_std = shear_method(U=subset_ad2cp.UVelocity.values, V=subset_ad2cp.VVelocity.values, W=subset_ad2cp.WVelocity.values, vx=vx, vy=vy, bins = subset_ad2cp['VelocityRange'].values, depth = subset_ad2cp['Pressure'].values, dz=dz)
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Shear Method", x ,"out of",len(start_times),"at" ,now)

                ###############################################
                #             Save master dataset             #
                ###############################################
                fname = "/home/jg1200/Data/GliderData/RU29/RU29_2020_AD2CP_NC/Processed_shear_{}_meter_vertical_resolution/RU29_2020_AD2CP_Processed_Segment_{}.csv".format(dz,x)
                ## Make into a dataframe to save as a CSV

                 # Make into a dataframe to save as a CSV
                d = {'U_shear_method': vel_referenced[:,0], 'V_shear_method': vel_referenced[:,1], "bin_new_shear": bin_centers,
                     "U_prof_std": vel_referenced_std[:,0], "V_prof_std": vel_referenced_std[:,1],
                     "start_lon": np.tile(vx_start_lon,len(bin_centers)), "start_lat": np.tile(vx_start_lat,len(bin_centers)),
                     "end_lon": np.tile(vx_end_lon,len(bin_centers)), "end_lat": np.tile(vx_end_lat,len(bin_centers)),
                     "start_tm": np.tile(vx_start_tm, len(bin_centers)), "end_tm": np.tile(vx_end_tm, len(bin_centers))}


                df = pd.DataFrame(data=d)
                df.to_csv(fname) 
                now = datetime.now().strftime("%m/%d/%y %H:%M:%S")
                print("Finished Writing Data", x ,"out of",len(start_times),"at" ,now)
                del subset_ad2cp
            else:
                del subset_ad2cp

