1. Query icepyx; see what tracks are available in area of interest

2. Save track numbers, beams, and repeat numbers into a dictionary

3. For each track/beam combination, loop over all possible repeat pairs

    A. Load all beams and all repeats for that track using icepyx (?). For all beams / repeats:
    
        - Do whatever we are doing with ATL03
    
        - Fill in nan gaps with noise
        
    B. For each repeat pair:
        
        - Loop across the along track coordinates: 
        
            Choices: window size, search width, running average window size, step, where to save data geographically
            
            Output: Best lag, corresponding correlation coefficient, equivalent along-track velocity
            
        - Save results in a text file with date collected, dx from ATL03 processing, lat, lon, veloc, correlation coefficient, best lag, # contributing nans

In [112]:
from icepyx import icesat2data as ipd
import os, glob, re, h5py, sys, pyproj
import matplotlib as plt
import shutil
import numpy as np
from pprint import pprint
from astropy.time import Time

%matplotlib inline

In [89]:
datapath = '/home/jovyan/shared/surface_velocity/FIS_ATL06'
ATL06_files=glob.glob(os.path.join(datapath, '*.h5'))




In [90]:
rgts = {}
for filepath in ATL06_files:
    filename = filepath.split('/')[-1]
    rgt = filename.split('_')[3][0:4]
    track = filename.split('_')[3][4:6]
#     print(rgt,track)
    if not rgt in rgts.keys():
        rgts[rgt] = []
        rgts[rgt].append(track)
    else:
        rgts[rgt].append(track)


# all rgt values in our study are are in rgts.keys()
print(rgts.keys())

# available tracks for each rgt are in rgts[rgt]; ex.:
print(rgts['0848'])

# let's work 0848, our first good track friend

dict_keys(['0080', '1131', '0232', '1031', '0634', '0507', '0131', '0192', '0354', '1061', '0492', '0690', '0970', '0187', '0558', '1335', '0741', '0659', '0894', '1183', '0680', '1101', '1168', '0034', '0568', '0705', '0293', '0711', '1040', '0070', '0543', '1244', '1192', '0314', '0126', '1193', '1147', '0253', '0451', '1122', '0994', '0391', '0141', '0979', '0476', '1223', '1137', '0726', '0918', '1314', '1253', '1177', '0750', '0330', '1010', '0193', '0781', '0872', '1299', '0629', '1055', '0695', '0309', '0467', '0802', '0644', '0461', '0415', '0635', '0924', '0482', '1214', '1076', '0573', '0339', '0833', '0171', '0446', '0385', '1336', '0796', '0369', '0756', '1238', '0674', '0903', '0955', '0650', '0772', '0832', '0766', '0513', '0308', '0857', '0720', '1162', '0848', '0202', '0019', '0071', '1138', '1259', '0522', '0390', '1254', '0360', '0933', '1025', '0512', '1000', '1153', '0842', '0400', '1351', '0751', '0628', '0537', '0583', '0878', '1320', '0491', '0552', '0421', '1315

In [96]:
def atl06_to_dict(filename, beam, field_dict=None, index=None, epsg=None):
    """
        Read selected datasets from an ATL06 file

        Input arguments:
            filename: ATl06 file to read
            beam: a string specifying which beam is to be read (ex: gt1l, gt1r, gt2l, etc)
            field_dict: A dictinary describing the fields to be read
                    keys give the group names to be read, 
                    entries are lists of datasets within the groups
            index: which entries in each field to read
            epsg: an EPSG code specifying a projection (see www.epsg.org).  Good choices are:
                for Greenland, 3413 (polar stereographic projection, with Greenland along the Y axis)
                for Antarctica, 3031 (polar stereographic projection, centered on the Pouth Pole)
        Output argument:
            D6: dictionary containing ATL06 data.  Each dataset in 
                dataset_dict has its own entry in D6.  Each dataset 
                in D6 contains a numpy array containing the 
                data
    """
    if field_dict is None:
        field_dict={None:['latitude','longitude','h_li', 'atl06_quality_summary'],\
                    'ground_track':['x_atc','y_atc'],\
                    'fit_statistics':['dh_fit_dx', 'dh_fit_dy']}
    D={}
    # below: file_re = regular expression, it will pull apart the regular expression to get the information from the filename
    file_re=re.compile('ATL06_(?P<date>\d+)_(?P<rgt>\d\d\d\d)(?P<cycle>\d\d)(?P<region>\d\d)_(?P<release>\d\d\d)_(?P<version>\d\d).h5')
    with h5py.File(filename,'r') as h5f:
        for key in field_dict:
            for ds in field_dict[key]:
                if key is not None:
                    ds_name=beam+'/land_ice_segments/'+key+'/'+ds
                else:
                    ds_name=beam+'/land_ice_segments/'+ds
                if index is not None:
                    D[ds]=np.array(h5f[ds_name][index])
                else:
                    D[ds]=np.array(h5f[ds_name])
                if '_FillValue' in h5f[ds_name].attrs:
                    bad_vals=D[ds]==h5f[ds_name].attrs['_FillValue']
                    D[ds]=D[ds].astype(float)
                    D[ds][bad_vals]=np.NaN
        D['data_start_utc'] = h5f['/ancillary_data/data_start_utc'][:]
        D['delta_time'] = h5f['/' + beam + '/land_ice_segments/delta_time'][:]
        D['segment_id'] = h5f['/' + beam + '/land_ice_segments/segment_id'][:]
    if epsg is not None:
        xy=np.array(pyproj.proj.Proj(epsg)(D['longitude'], D['latitude']))
        D['x']=xy[0,:].reshape(D['latitude'].shape)
        D['y']=xy[1,:].reshape(D['latitude'].shape)
    temp=file_re.search(filename)
    D['rgt']=int(temp['rgt'])
    D['cycle']=int(temp['cycle'])
    D['beam']=beam
    return D

# A revised code to plot the elevations of segment midpoints (h_li):
def plot_elevation(D6, ind=None, **kwargs):
    """
    Plot midpoint elevation for each ATL06 segment
    """
    if ind is None:
        ind=np.ones_like(D6['h_li'], dtype=bool)
    # pull out heights of segment midpoints
    h_li = D6['h_li'][ind]
    # pull out along track x coordinates of segment midpoints
    x_atc = D6['x_atc'][ind]

    plt.plot(x_atc, h_li, **kwargs)

  file_re=re.compile('ATL06_(?P<date>\d+)_(?P<rgt>\d\d\d\d)(?P<cycle>\d\d)(?P<region>\d\d)_(?P<release>\d\d\d)_(?P<version>\d\d).h5')


# Loop over rgts and do the correlation processing

TOMORROW: START WITH NEXT CELL IN OLD CODE, IMPLEMENT MAKING THE X1 VEC AND LOOPING

In [117]:
cycles = ['03','04','05','06','07'] # not doing 1 and 2, because don't overlap exactly
# this could be future work

beams = ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']

# try and smooth without filling nans
dx = 20 # x_atc coordinate distance
smoothing_window_size = int(np.round(40 / dx)) # meters / dx;
# ex., 60 m smoothing window is a 3 point running average smoothed dataset, because each point is 20 m apart
filt = np.ones(smoothing_window_size)
smoothed = True

segment_length = 2000 # m
search_width = 800 # m


for rgt in rgts.keys():
    if rgt == '0848': # just want to work on this track for now
        
        ### load all files for this rgt
        rgt_files = glob.glob(os.path.join(datapath, f'*ATL06_*_{rgt}*_003*.h5'))
        
        ### extract data from all available cycles
        x_atc = {}
        h_li_raw = {}
        h_li = {}
        h_li_diff = {}
        times = {}
        min_seg_ids = {}
        min_x_atc = {}

        cycles_this_rgt = []
        for cycle in cycles:
            # load data that matches cycle; put into dictionaries to use shortly
            Di = {}
            x_atc[cycle] = {}
            h_li_raw[cycle] = {}
            h_li[cycle] = {}
            h_li_diff[cycle] = {}
            times[cycle] = {}
            min_seg_ids[cycle] = {}
            min_x_atc[cycle] = {}



            filenames = glob.glob(os.path.join(datapath, f'*ATL06_*_{rgt}{cycle}*_003*.h5'))
            print(filenames)
            error_count=0
            for filename in filenames:
                try:
                    for beam in beams:
                        Di[filename]=atl06_to_dict(filename,'/'+ beam, index=None, epsg=3031)

                        times[cycle][beam] = Di[filename]['data_start_utc']

                        # extract h_li and x_atc for that section                
                        x_atc_tmp = Di[filename]['x_atc']
                        h_li_tmp = Di[filename]['h_li']#[ixs]

                        # segment ids:
                        seg_ids = Di[filename]['segment_id']
                        min_seg_ids[cycle][beam] = seg_ids[0]
                        #print(len(seg_ids), len(x_atc_tmp))

                        # make a monotonically increasing x vector
                        # assumes dx = 20 exactly, so be carefull referencing back
                        ind = seg_ids - np.nanmin(seg_ids) # indices starting at zero, using the segment_id field, so any skipped segment will be kept in correct location
                        x_full = np.arange(np.max(ind)+1) * 20 + x_atc_tmp[0]
                        h_full = np.zeros(np.max(ind)+1) + np.NaN
                        h_full[ind] = h_li_tmp
                        min_x_atc[cycle][beam] = x_atc_tmp[0]


                        x_atc[cycle][beam] = x_full
                        h_li_raw[cycle][beam] = h_full

                        # running average smoother /filter
                        if smoothed == True:
                            h_smoothed = (1/smoothing_window_size) * np.convolve(filt, h_full, mode = 'same')
                            h_li[cycle][beam] = h_smoothed

                            # differentiate that section of data
                            h_diff = (h_smoothed[1:] - h_smoothed[0:-1]) / (x_full[1:] - x_full[0:-1])
                        else: 
                            h_li[cycle][beam] = h_full
                            h_diff = (h_full[1:] - h_full[0:-1]) / (x_full[1:] - x_full[0:-1])

                        h_li_diff[cycle][beam] = h_diff

#                         # plot
#                         axs[0].plot(x_full, h_full)
#                         axs[1].plot(x_full[1:], h_diff)
#         #                 axs[2].plot(x_atc_tmp[1:] - x_atc_tmp[:-1])
#                         axs[2].plot(np.isnan(h_full))
#                         axs[3].plot(seg_ids[1:]- seg_ids[:-1])


                    cycles_this_rgt+=[cycle]


                except KeyError as e:
                    print(f'file {filename} encountered error {e}')
                    error_count += 1
            
            print(f"For cycle {cycle}, read {len(Di)} data files of which {error_count} gave errors")
            
        ### Determine # of possible velocities:
        n_possible_veloc = len(cycles_this_rgt) -1 # naive, for now; can improve later
        for veloc_number in range(n_possible_veloc):
            cycle1 = cycles[veloc_number]
            cycle2 = cycles[veloc_number+1]
            t1_string = times[cycle1]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
            t1 = Time(t1_string)

            t2_string = times[cycle2]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
            t2 = Time(t2_string)

            dt = (t2 - t1).jd # difference in julian days
        
            velocities = {}     
            for beam in beams:
                # fig1, axs = plt.subplots(4,1)

                ### determine x1: larger value for both beams, if different
                x1 = np.nanmax([x_atc[cycle1][beam][0], x_atc[cycle2][beam][0]])                
                
                # cut out small chunk of data at time t1 (first cycle)
                x_full_t1 = x_atc[cycle1][beam]
                ix_x1 = np.arange(len(x_full_t1))[x_full_t1 >= x1][0]
                ix_x2 = ix_x1 + int(np.round(segment_length/dx))      
                x_t1 = x_full_t1[ix_x1:ix_x2]
                h_li1 = h_li_diff[cycle1][beam][ix_x1-1:ix_x2-1] # start 1 index earlier because the data are differentiated

                # cut out a wider chunk of data at time t2 (second cycle)
                x_full_t2 = x_atc[cycle2][beam]
                ix_x3 = ix_x1 - int(np.round(search_width/dx)) # offset on earlier end by # indices in search_width
                ix_x4 = ix_x2 + int(np.round(search_width/dx)) # offset on later end by # indices in search_width
                x_t2 = x_full_t2[ix_x3:ix_x4]
                h_li2 = h_li_diff[cycle2][beam][ix_x3:ix_x4]

                # plot data
                # axs[0].plot(x_t2, h_li2, 'r')
                # axs[0].plot(x_t1, h_li1, 'k')
                # axs[0].set_xlabel('x_atc (m)')

                # correlate old with newer data
                corr = correlate(h_li1, h_li2, mode = 'valid', method = 'direct') 

                # normalize correlation function; simplest way (not quite correct)
                # norm_val = np.sqrt(np.sum(h_li1**2)*np.sum(h_li2**2)) # normalize so values range between 0 and 1
                # corr = corr / norm_val

                # a better way to normalize correlation function: shifting along longer vector
                coeff_a_val = np.sum(h_li1**2)
                coeff_b_val = np.zeros(len(h_li2) - len(h_li1)+1)
                for shift in range(len(h_li2) - len(h_li1)+1):
                    coeff_b_val[shift] = np.sum(h_li2[shift:shift + len(h_li1)]**2)
                norm_vec = np.sqrt(coeff_a_val * coeff_b_val)
                corr = corr / norm_vec


        #         lagvec = np.arange( -(len(h_li1) - 1), len(h_li2), 1)# for mode = 'full'
        #         lagvec = np.arange( -int(search_width/dx) - 1, int(search_width/dx) +1, 1) # for mode = 'valid'
                lagvec = np.arange(- int(np.round(search_width/dx)), int(search_width/dx) +1,1)# for mode = 'valid'

                shift_vec = lagvec * dx

                ix_peak = np.arange(len(corr))[corr == np.nanmax(corr)][0]
                best_lag = lagvec[ix_peak]
                best_shift = shift_vec[ix_peak]
                velocities[beam] = best_shift/(dt/365)

                # axs[1].plot(lagvec,corr)
                # axs[1].plot(lagvec[ix_peak],corr[ix_peak], 'r*')
                # axs[1].set_xlabel('lag (samples)')

                # axs[2].plot(shift_vec,corr)
                # axs[2].plot(shift_vec[ix_peak],corr[ix_peak], 'r*')
                # axs[2].set_xlabel('shift (m)')

                ## plot shifted data
                # axs[3].plot(x_t2, h_li2, 'r')
                # axs[3].plot(x_t1 - best_shift, h_li1, 'k')
                # axs[3].set_xlabel('x_atc (m)')

                # axs[0].text(x_t2[100], 0.6*np.nanmax(h_li2), beam)
                # axs[1].text(lagvec[5], 0.6*np.nanmax(corr), 'best lag: ' + str(best_lag) + '; corr val: ' + str(np.round(corr[ix_peak],3)))
                # axs[2].text(shift_vec[5], 0.6*np.nanmax(corr), 'best shift: ' + str(best_shift) + ' m'+ '; corr val: ' + str(np.round(corr[ix_peak],3)))
                # axs[2].text(shift_vec[5], 0.3*np.nanmax(corr), 'veloc of ' + str(np.round(best_shift/(dt/365),1)) + ' m/yr')


            
        
#         Di={}
#         error_count=0
#         for file in rgt_files:
#             try:
#                 D_dict[file]=atl06_to_dict(file, '/gt2l', index=slice(0, -1, 25), epsg=3031)
#             except KeyError as e:
#                 print(f'file {file} encountered error {e}')
#                 error_count += 1
#         print(f"read {len(D_dict)} data files of which {error_count} gave errors")
            


['/home/jovyan/shared/surface_velocity/FIS_ATL06/processed_ATL06_20190523195046_08480311_003_01.h5']
For cycle 03, read 1 data files of which 0 gave errors
['/home/jovyan/shared/surface_velocity/FIS_ATL06/processed_ATL06_20190822153035_08480411_003_01.h5']
For cycle 04, read 1 data files of which 0 gave errors
[]
For cycle 05, read 0 data files of which 0 gave errors
[]
For cycle 06, read 0 data files of which 0 gave errors
[]
For cycle 07, read 0 data files of which 0 gave errors


NameError: name 'x1' is not defined

In [110]:
cycles_this_rgt

['03', '04']

In [118]:
min_x_atc

{'03': {'gt1l': 29123498.50436068,
  'gt1r': 29123518.40319396,
  'gt2l': 29124294.457597345,
  'gt2r': 29124314.356425773,
  'gt3l': 29125110.309463445,
  'gt3r': 29125130.2082869},
 '04': {'gt1l': 29123498.50436068,
  'gt1r': 29123518.40319396,
  'gt2l': 29124294.457597345,
  'gt2r': 29124314.356425773,
  'gt3l': 29125090.410639867,
  'gt3r': 29125130.2082869},
 '05': {},
 '06': {},
 '07': {}}