1. Query icepyx; see what tracks are available in area of interest <-- doesn't work, using local data instead

2. Save track numbers, beams, and repeat numbers into a dictionary

3. For each track/beam combination, loop over all possible repeat pairs

    A. Load all beams and all repeats for that track using icepyx (?). For all beams / repeats:
    
        - Do whatever we are doing with ATL03
    
        - Fill in nan gaps with noise
        
    B. For each repeat pair:
        
        - Loop across the along track coordinates: 
        
            Choices: window size, search width, running average window size, step, where to save data geographically
            
            Output: Best lag, corresponding correlation coefficient, equivalent along-track velocity
            
        - Save results in a hdf5 file with date collected, dx from ATL03 processing, lat, lon, veloc, correlation coefficient, best lag, # contributing nans

In [230]:
from icepyx import icesat2data as ipd
import os, glob, re, h5py, sys, pyproj
import matplotlib as plt
import shutil
import numpy as np
from pprint import pprint
from astropy.time import Time
from scipy.signal import correlate, detrend
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib widget

import pointCollection as pc

In [2]:
datapath = '/home/jovyan/shared/surface_velocity/FIS_ATL06'
ATL06_files=glob.glob(os.path.join(datapath, '*.h5'))

out_path = 'shared/surface_velocity/ATL06_out/'


In [4]:
rgts = {}
for filepath in ATL06_files:
    filename = filepath.split('/')[-1]
    rgt = filename.split('_')[3][0:4]
    track = filename.split('_')[3][4:6]
#     print(rgt,track)
    if not rgt in rgts.keys():
        rgts[rgt] = []
        rgts[rgt].append(track)
    else:
        rgts[rgt].append(track)


# all rgt values in our study are are in rgts.keys()
print(rgts.keys())

# available tracks for each rgt are in rgts[rgt]; ex.:
print(rgts['0848'])


dict_keys(['0080', '1131', '0232', '1031', '0634', '0507', '0131', '0192', '0354', '1061', '0492', '0690', '0970', '0187', '0558', '1335', '0741', '0659', '0894', '1183', '0680', '1101', '1168', '0034', '0568', '0705', '0293', '0711', '1040', '0070', '0543', '1244', '1192', '0314', '0126', '1193', '1147', '0253', '0451', '1122', '0994', '0391', '0141', '0979', '0476', '1223', '1137', '0726', '0918', '1314', '1253', '1177', '0750', '0330', '1010', '0193', '0781', '0872', '1299', '0629', '1055', '0695', '0309', '0467', '0802', '0644', '0461', '0415', '0635', '0924', '0482', '1214', '1076', '0573', '0339', '0833', '0171', '0446', '0385', '1336', '0796', '0369', '0756', '1238', '0674', '0903', '0955', '0650', '0772', '0832', '0766', '0513', '0308', '0857', '0720', '1162', '0848', '0202', '0019', '0071', '1138', '1259', '0522', '0390', '1254', '0360', '0933', '1025', '0512', '1000', '1153', '0842', '0400', '1351', '0751', '0628', '0537', '0583', '0878', '1320', '0491', '0552', '0421', '1315

In [5]:
def atl06_to_dict(filename, beam, field_dict=None, index=None, epsg=None):
    """
        Read selected datasets from an ATL06 file

        Input arguments:
            filename: ATl06 file to read
            beam: a string specifying which beam is to be read (ex: gt1l, gt1r, gt2l, etc)
            field_dict: A dictinary describing the fields to be read
                    keys give the group names to be read, 
                    entries are lists of datasets within the groups
            index: which entries in each field to read
            epsg: an EPSG code specifying a projection (see www.epsg.org).  Good choices are:
                for Greenland, 3413 (polar stereographic projection, with Greenland along the Y axis)
                for Antarctica, 3031 (polar stereographic projection, centered on the Pouth Pole)
        Output argument:
            D6: dictionary containing ATL06 data.  Each dataset in 
                dataset_dict has its own entry in D6.  Each dataset 
                in D6 contains a numpy array containing the 
                data
    """
    if field_dict is None:
        field_dict={None:['latitude','longitude','h_li', 'atl06_quality_summary'],\
                    'ground_track':['x_atc','y_atc'],\
                    'fit_statistics':['dh_fit_dx', 'dh_fit_dy']}
    D={}
    # below: file_re = regular expression, it will pull apart the regular expression to get the information from the filename
    file_re=re.compile('ATL06_(?P<date>\d+)_(?P<rgt>\d\d\d\d)(?P<cycle>\d\d)(?P<region>\d\d)_(?P<release>\d\d\d)_(?P<version>\d\d).h5')
    with h5py.File(filename,'r') as h5f:
        for key in field_dict:
            for ds in field_dict[key]:
                if key is not None:
                    ds_name=beam+'/land_ice_segments/'+key+'/'+ds
                else:
                    ds_name=beam+'/land_ice_segments/'+ds
                if index is not None:
                    D[ds]=np.array(h5f[ds_name][index])
                else:
                    D[ds]=np.array(h5f[ds_name])
                if '_FillValue' in h5f[ds_name].attrs:
                    bad_vals=D[ds]==h5f[ds_name].attrs['_FillValue']
                    D[ds]=D[ds].astype(float)
                    D[ds][bad_vals]=np.NaN
        D['data_start_utc'] = h5f['/ancillary_data/data_start_utc'][:]
        D['delta_time'] = h5f['/' + beam + '/land_ice_segments/delta_time'][:]
        D['segment_id'] = h5f['/' + beam + '/land_ice_segments/segment_id'][:]
    if epsg is not None:
        xy=np.array(pyproj.proj.Proj(epsg)(D['longitude'], D['latitude']))
        D['x']=xy[0,:].reshape(D['latitude'].shape)
        D['y']=xy[1,:].reshape(D['latitude'].shape)
    temp=file_re.search(filename)
    D['rgt']=int(temp['rgt'])
    D['cycle']=int(temp['cycle'])
    D['beam']=beam
    return D

# A revised code to plot the elevations of segment midpoints (h_li):
def plot_elevation(D6, ind=None, **kwargs):
    """
    Plot midpoint elevation for each ATL06 segment
    """
    if ind is None:
        ind=np.ones_like(D6['h_li'], dtype=bool)
    # pull out heights of segment midpoints
    h_li = D6['h_li'][ind]
    # pull out along track x coordinates of segment midpoints
    x_atc = D6['x_atc'][ind]

    plt.plot(x_atc, h_li, **kwargs)

# Loop over rgts and do the correlation processing

some functions:

In [224]:
def load_data_by_rgt(rgt, smoothing, smoothing_window_size, dx, path_to_data, product):
    """ 
    rgt: repeat ground track number of desired data
    smoothing: if true, a centered running avergae filter of smoothing_window_size will be used
    smoothing_window_size: how large a smoothing window to use (in meters)
    dx: desired spacing 
    path_to_data: 
    product: ex., ATL06
    """ 
    
    # hard code these for now:
    cycles = ['03','04','05','06','07'] # not doing 1 and 2, because don't overlap exactly
    beams = ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r'] 

    ### extract data from all available cycles
    x_atc = {}
    lats = {}
    lons = {}
    h_li_raw = {} # unsmoothed data; equally spaced x_atc, still has nans 
    h_li_raw_NoNans = {} # unsmoothed data; equally spaced x_atc, nans filled with noise
    h_li = {} # smoothed data, equally spaced x_atc, nans filled with noise 
    h_li_diff = {}
    times = {}
    min_seg_ids = {}
    segment_ids = {}

    cycles_this_rgt = []
    for cycle in cycles: # loop over all available cycles
        Di = {}
        x_atc[cycle] = {}
        lats[cycle] = {}
        lons[cycle] = {}
        h_li_raw[cycle] = {}
        h_li_raw_NoNans[cycle] = {}
        h_li[cycle] = {}
        h_li_diff[cycle] = {}
        times[cycle] = {}
        min_seg_ids[cycle] = {}
        segment_ids[cycle] = {}


        filenames = glob.glob(os.path.join(path_to_data, f'*{product}_*_{rgt}{cycle}*_003*.h5'))
        error_count=0


        for filename in filenames: # try and load any available files; hopefully is just one
            try:
                for beam in beams:
                    Di[filename]=atl06_to_dict(filename,'/'+ beam, index=None, epsg=3031)

                    times[cycle][beam] = Di[filename]['data_start_utc']

                    # extract h_li and x_atc, and lat/lons for that section                
                    x_atc_tmp = Di[filename]['x_atc']
                    h_li_tmp = Di[filename]['h_li']#[ixs]
                    lats_tmp = Di[filename]['latitude']
                    lons_tmp = Di[filename]['longitude']


                    # segment ids:
                    seg_ids = Di[filename]['segment_id']
                    min_seg_ids[cycle][beam] = seg_ids[0]
                    #print(len(seg_ids), len(x_atc_tmp))

                    # make a monotonically increasing x vector
                    # assumes dx = 20 exactly, so be carefull referencing back
                    ind = seg_ids - np.nanmin(seg_ids) # indices starting at zero, using the segment_id field, so any skipped segment will be kept in correct location
                    x_full = np.arange(np.max(ind)+1) * 20 + x_atc_tmp[0]
                    h_full = np.zeros(np.max(ind)+1) + np.NaN
                    h_full[ind] = h_li_tmp
                    lats_full = np.zeros(np.shape(x_full)) * np.nan
                    lats_full[ind] = lats_tmp
                    lons_full = np.zeros(np.shape(x_full)) * np.nan
                    lons_full[ind] = lons_tmp

                    ## save the segment id's themselves, with gaps filled in
                    segment_ids[cycle][beam] = np.zeros(np.max(ind)+1) + np.NaN
                    segment_ids[cycle][beam][ind] = seg_ids


                    x_atc[cycle][beam] = x_full
                    h_li_raw[cycle][beam] = h_full # preserves nan values
                    lons[cycle][beam] = lons_full
                    lats[cycle][beam] = lats_full

                    ### fill in nans with noise h_li datasets
            #                         h = ma.array(h_full,mask =np.isnan(h_full)) # created a masked array, mask is where the nans are
            #                         h_full_filled = h.mask * (np.random.randn(*h.shape)) # fill in all the nans with random noise

                    ### interpolate nans in pandas
                    # put in dataframe for just this step; eventually rewrite to use only dataframes?              
                    data = {'x_full': x_full, 'h_full': h_full}
                    df = pd.DataFrame(data, columns = ['x_full','h_full'])
                    #df.plot(x='x_full',y='h_full')
                    # linear interpolation for now
                    df['h_full'].interpolate(method = 'linear', inplace = True)
                    h_full_interp = df['h_full'].values
                    h_li_raw_NoNans[cycle][beam] = h_full_interp # has filled nan values


                    # running average smoother /filter
                    if smoothing == True:
                        h_smoothed = (1/smoothing_window_size) * np.convolve(filt, h_full_interp, mode = 'same')
                        h_li[cycle][beam] = h_smoothed

                        # differentiate that section of data
                        h_diff = (h_smoothed[1:] - h_smoothed[0:-1]) / (x_full[1:] - x_full[0:-1])
                    else: 
                        h_li[cycle][beam] = h_full_interp
                        h_diff = (h_full_interp[1:] - h_full_interp[0:-1]) / (x_full[1:] - x_full[0:-1])
                    h_li_diff[cycle][beam] = h_diff



                    #print(len(x_full), len(h_full), len(lats_full), len(seg_ids), len(h_full_interp), len(h_diff))


                cycles_this_rgt+=[cycle]
            except KeyError as e:
                print(f'file {filename} encountered error {e}')
                error_count += 1

    print('Cycles available: ' + ','.join(cycles_this_rgt))
    return x_atc, lats, lons, h_li_raw, h_li_raw_NoNans, h_li, h_li_diff, \
            times, min_seg_ids, segment_ids, cycles_this_rgt
    
    

In [251]:
cycles = ['03','04','05','06','07'] # not doing 1 and 2, because don't overlap exactly
# this could be future work

beams = ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']

product = 'ATL06'
dx = 20 # x_atc coordinate distance

# control 
segment_length = 2000 # m
search_width = 800 # m
along_track_step = 100 # m; how much to jump between each veloc determination
max_percent_nans = 10 # what % of segment length can be nans

# smoothing
smoothing = True
smoothing_window_size = int(np.round(40 / dx)) # meters / dx;
# ex., 60 m smoothing window is a 3 point running average smoothed dataset, because each point is 20 m apart
filt = np.ones(smoothing_window_size)

velocities = {}   
correlations = {}     
lags = {}
x_atcs_for_velocities = {}
latitudes = {}
longitudes = {}
rgts_with_errors = []
total_number_repeat_tracks_processed = 0
for ir, rgt in enumerate(rgts.keys()):
    if ir >= 0: # in case you want to look at certain ones
        try:
            print('\nProcessing rgt ' + rgt + ', #' +str(ir) + ' of ' + str(len(rgts.keys())))

            ### load all files for this rgt
            rgt_files = glob.glob(os.path.join(datapath, f'*ATL06_*_{rgt}*_003*.h5'))
            n_rgt_files_cycle3_and_after = 0
            for file in rgt_files:
                if float(file.split('/')[-1].split('_')[3][4:6]) >= 3:
                    n_rgt_files_cycle3_and_after += 1

            print('There are ' +str(n_rgt_files_cycle3_and_after) + ' files available for this track from cycle 3 onward')


            ### only process if there is at least one repeat track during the time period when data overlapped
            if n_rgt_files_cycle3_and_after >= 2:


                ### extract data from all available cycles
                x_atc, lats, lons, h_li_raw, h_li_raw_NoNans, h_li, h_li_diff, times, min_seg_ids, segment_ids, cycles_this_rgt = \
                    load_data_by_rgt(rgt, smoothing, smoothing_window_size, dx, datapath, product)
                # 98% sure this code returns the correct values
                
                ### Determine # of possible velocities:
                n_possible_veloc = len(cycles_this_rgt) -1 # naive, for now; can improve later
                for veloc_number in range(n_possible_veloc):
                    h5_file_out = f'{out_path}rgt{rgt}_veloc{veloc_number}.hdf5'
                    with h5py.File(h5_file_out, 'w') as f:
                        f['dx'] = dx 
                        f['product'] = product 
                        f['segment_length'] = segment_length 
                        f['search_width'] = search_width 
                        f['along_track_step'] = along_track_step 
                        f['max_percent_nans'] = max_percent_nans 
                        f['smoothing'] = smoothing 
                        f['smoothing_window_size'] = smoothing_window_size 
                        f['process_date'] = str(Time.now().value) 



                    cycle1 = cycles_this_rgt[veloc_number]
                    cycle2 = cycles_this_rgt[veloc_number+1]
                    t1_string = times[cycle1]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
                    t1 = Time(t1_string)

                    t2_string = times[cycle2]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
                    t2 = Time(t2_string)

                    dt = (t2 - t1).jd # difference in julian days


                    velocities[rgt] = {}   
                    correlations[rgt] = {}     
                    lags[rgt] = {}

                    for beam in beams:
                        # fig1, axs = plt.subplots(4,1)


                        ### determine x1: larger value for both beams, if different
                        min_x_atc_cycle1 = x_atc[cycle1][beam][0]
                        min_x_atc_cycle2 = x_atc[cycle2][beam][0]

                        # pick out the track that starts at greater x_atc, and use that as x1s vector
                        if min_x_atc_cycle1 != min_x_atc_cycle2: 
                            x1 = np.nanmax([min_x_atc_cycle1,min_x_atc_cycle2])
                            cycle_n = np.arange(0,2)[[min_x_atc_cycle1,min_x_atc_cycle2] == x1][0]
                            if cycle_n == 0:
                                cycletmp = cycle2
                            elif cycle_n == 1:
                                cycletmp = cycle1
                            n_segments_this_track = (len(x_atc[cycletmp][beam]) - search_width/dx) / (along_track_step/dx)
                            x1s = x_atc[cycletmp][beam][int(search_width/dx)+1::int(search_width/dx)]
                            # start at search_width/dx in, so the code never tries to get data outside the edges of this rgt
                            # add 1 bc the data are differentiated, and h_li_diff is therefore one point shorter

                        elif min_x_atc_cycle1 == min_x_atc_cycle2: # doesn't matter which cycle
                            x1s = x_atc[cycle1][beam][int(search_width/dx)+1::int(search_width/dx)]

                        ### determine xend: smaller value for both beams, if different
                        max_x_atc_cycle1 = x_atc[cycle1][beam][-1]
                        max_x_atc_cycle2 = x_atc[cycle2][beam][-1]
                        smallest_xatc = np.min([max_x_atc_cycle1,max_x_atc_cycle2])
                        ixmax = np.where(x1s >= smallest_xatc - search_width/dx)
                        if len(ixmax[0]) >= 1:
                            ixtmp = ixmax[0][0]
                            x1s = x1s[:ixtmp]

                        ### dicts to store info in
                        velocities[rgt][beam] = np.empty_like(x1s)
                        correlations[rgt][beam] = np.empty_like(x1s)
                        lags[rgt][beam] = np.empty_like(x1s)

                        midpoints_x_atc = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_lat = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_lon = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_seg_ids = np.empty(np.shape(x1s)) # for writing out 
                        
                        for xi, x1 in enumerate(x1s):
                            # cut out small chunk of data at time t1 (first cycle)
                            x_full_t1 = x_atc[cycle1][beam]
                            ix_x1 = np.arange(len(x_full_t1))[x_full_t1 >= x1][0]
                            ix_x2 = ix_x1 + int(np.round(segment_length/dx))      
                            x_t1 = x_full_t1[ix_x1:ix_x2]
                            lats_t1 = lats[cycle1][beam][ix_x1:ix_x2]
                            lons_t1 = lons[cycle1][beam][ix_x1:ix_x2]
                            seg_ids_t1 = segment_ids[cycle1][beam][ix_x1:ix_x2]
                            h_li1 = h_li_diff[cycle1][beam][ix_x1-1:ix_x2-1] # start 1 index earlier because 
                            # the h_li_diff data are differentiated, and therefore one sample shorter

                            # find midpoints; this is the position where we will assign the velocity measurement from each window
                            n = len(x_t1)
                            midpt_ix = int(np.floor(n/2))
                            midpoints_x_atc[xi] = x_t1[midpt_ix]
                            midpoints_lat[xi] = lats_t1[midpt_ix]
                            midpoints_lon[xi] = lons_t1[midpt_ix]
                            midpoints_seg_ids[xi] = seg_ids_t1[midpt_ix]
                            
                            # cut out a wider chunk of data at time t2 (second cycle)
                            x_full_t2 = x_atc[cycle2][beam]
                            ix_x3 = ix_x1 - int(np.round(search_width/dx)) # offset on earlier end by # indices in search_width
                            ix_x4 = ix_x2 + int(np.round(search_width/dx)) # offset on later end by # indices in search_width
                            x_t2 = x_full_t2[ix_x3:ix_x4]
                            h_li2 = h_li_diff[cycle2][beam][ix_x3-1:ix_x4-1]# start 1 index earlier because 
                            # the h_li_diff data are differentiated, and therefore one sample shorter

                            # plot data
                            # axs[0].plot(x_t2, h_li2, 'r')
                            # axs[0].plot(x_t1, h_li1, 'k')
                            # axs[0].set_xlabel('x_atc (m)')

                            ### if there are fewer than 10% nans in either data chunk:
                            n_nans1 = np.sum(np.isnan(h_li_raw[cycle1][beam][ix_x1:ix_x2]))
                            n_nans2 = np.sum(np.isnan(h_li_raw[cycle2][beam][ix_x3:ix_x4]))

                            if (n_nans1 / len(h_li1) <= max_percent_nans/100) and (n_nans2 / len(h_li2) <= max_percent_nans/100):

                                # correlate old with newer data
                                # detrend both chunks of data
                                h_li1 = detrend(h_li1,type = 'linear')
                                h_li2 = detrend(h_li2,type = 'linear')

                                # normalize both chunks of data
            #                         h_li1 = h_li1 / np.nanmax(np.abs(h_li1))
            #                         h_li2 = h_li2 / np.nanmax(np.abs(h_li2))

                                corr = correlate(h_li1, h_li2, mode = 'valid', method = 'direct') 

                                # a better way to normalize correlation function: shifting along longer vector
                                # normalize by autocorrelations
                                coeff_a_val = np.sum(h_li1**2)
                                coeff_b_val = np.zeros(len(h_li2) - len(h_li1)+1)
                                for shift in range(len(h_li2) - len(h_li1)+1):
                                    h_li2_section = h_li2[shift:shift + len(h_li1)]
                                    coeff_b_val[shift] = np.sum(h_li2_section **2)
                                norm_vec = np.sqrt(coeff_a_val * coeff_b_val)
                                corr_normed = corr / np.flip(norm_vec) # i don't really understand why this has to flip, but it does

                                lagvec = np.arange(- int(np.round(search_width/dx)), int(search_width/dx) +1,1)# for mode = 'valid'

                                shift_vec = lagvec * dx

                                ix_peak = np.arange(len(corr_normed))[corr_normed == np.nanmax(corr_normed)][0]
                                best_lag = lagvec[ix_peak]
                                best_shift = shift_vec[ix_peak]
                                velocities[rgt][beam][xi] = best_shift/(dt/365)
                                correlations[rgt][beam][xi] = corr_normed[ix_peak]
                                lags[rgt][beam][xi] = lagvec[ix_peak]
                            else:
                                velocities[rgt][beam][xi] = np.nan
                                correlations[rgt][beam][xi] = np.nan
                                lags[rgt][beam][xi] = np.nan
                                
                                
                        ### Add velocities to hdf5 file for each beam
                        with h5py.File(h5_file_out, 'a') as f:
                            f[beam +'/x_atc'] = midpoints_x_atc # assign x_atc value of half way along the segment
                            f[beam +'/latitudes'] = midpoints_lat # assign x_atc value of half way along the segment
                            f[beam +'/longitudes'] = midpoints_lon # assign x_atc value of half way along the segment
                            f[beam +'/velocities'] = velocities[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/correlation_coefficients'] = correlations[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/best_lags'] = lags[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/segment_ids'] = midpoints_seg_ids
                            f[beam +'/first_cycle_time'] = str(Time(times[cycle1][beam][0]))
                            f[beam +'/second_cycle_time'] = str(Time(times[cycle2][beam][0]))

                        
                    with h5py.File(h5_file_out, 'a') as f:
                        f['contributing_cycles'] = ','.join([cycle1,cycle2])

            
                total_number_repeat_tracks_processed += 1
                

        except (ValueError, IndexError) as e:
            print(f'rgt {rgt} encountered an error')
            print(e)
            rgts_with_errors.append(rgt)
            
print(f'Total number of repeat tracks successfully processed = {total_number_repeat_tracks_processed}')






Processing rgt 0080, #0 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1131, #1 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0232, #2 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1031, #3 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0634, #4 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0507, #5 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0131, #6 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0192, #7 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0354, #8 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1061, #9 of 218
There are 1 files available for this track from cycle 3 onward

P




Processing rgt 0918, #48 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1314, #49 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1253, #50 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1177, #51 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0750, #52 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0330, #53 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1010, #54 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0193, #55 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0781, #56 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04
rgt 0781 encountered an error
negative dimensions are not allowed

Processing rgt 0872, 




Processing rgt 0552, #121 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04





Processing rgt 0421, #122 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1315, #123 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1015, #124 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0954, #125 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0040, #126 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1275, #127 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0132, #128 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0812, #129 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0598, #130 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04





Processing rgt 0985, #131 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1016, #132 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1330, #133 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0324, #134 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0589, #135 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04




rgt 0589 encountered an error
index 95 is out of bounds for axis 0 with size 81

Processing rgt 1132, #136 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0909, #137 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0049, #138 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0370, #139 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0263, #140 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0735, #141 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04





Processing rgt 0613, #142 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04





Processing rgt 0095, #143 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 0431, #144 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0345, #145 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1071, #146 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0689, #147 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 1274, #148 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1376, #149 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 1345, #150 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0811, #151 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0004, #152 of 218
There are 1 files available for this track fr




Processing rgt 0065, #155 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0964, #156 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0217, #157 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1208, #158 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0893, #159 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0147, #160 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1360, #161 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0939, #162 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1092, #163 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1229, #164 of 218
There are 1 files available for this track from cycle 3 onward

Proce



rgt 0574 encountered an error
index 153 is out of bounds for axis 0 with size 81

Processing rgt 0949, #174 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0873, #175 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1070, #176 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 0497, #177 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0009, #178 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1366, #179 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 0528, #180 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 1198, #181 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0025, #182 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0619, 




Processing rgt 0827, #192 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 0665, #193 of 218
There are 2 files available for this track from cycle 3 onward
Cycles available: 03,04

Processing rgt 0430, #194 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0116, #195 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0771, #196 of 218
There are 0 files available for this track from cycle 3 onward

Processing rgt 0208, #197 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0086, #198 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0375, #199 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0186, #200 of 218
There are 1 files available for this track from cycle 3 onward

Processing rgt 0248, #201 of 218
There are 1 files available for this track from cycle 3 onward

Proce


# Load data, make a map of correlation coefficient

In [252]:
!ls /home/jovyan/shared/surface_velocity/ATL06_out/

rgt0446_veloc0.hdf5  rgt0598_veloc0.hdf5  rgt0741_veloc0.hdf5
rgt0451_veloc0.hdf5  rgt0604_veloc0.hdf5  rgt0750_veloc0.hdf5
rgt0452_veloc0.hdf5  rgt0613_veloc0.hdf5  rgt0751_veloc0.hdf5
rgt0461_veloc0.hdf5  rgt0619_veloc0.hdf5  rgt0756_veloc0.hdf5
rgt0467_veloc0.hdf5  rgt0629_veloc0.hdf5  rgt0766_veloc0.hdf5
rgt0476_veloc0.hdf5  rgt0634_veloc0.hdf5  rgt0772_veloc0.hdf5
rgt0482_veloc0.hdf5  rgt0635_veloc0.hdf5  rgt0781_veloc0.hdf5
rgt0491_veloc0.hdf5  rgt0644_veloc0.hdf5  rgt0787_veloc0.hdf5
rgt0497_veloc0.hdf5  rgt0650_veloc0.hdf5  rgt0796_veloc0.hdf5
rgt0507_veloc0.hdf5  rgt0659_veloc0.hdf5  rgt0802_veloc0.hdf5
rgt0512_veloc0.hdf5  rgt0665_veloc0.hdf5  rgt0811_veloc0.hdf5
rgt0513_veloc0.hdf5  rgt0674_veloc0.hdf5  rgt0817_veloc0.hdf5
rgt0522_veloc0.hdf5  rgt0680_veloc0.hdf5  rgt0833_veloc0.hdf5
rgt0537_veloc0.hdf5  rgt0689_veloc0.hdf5  rgt0848_veloc0.hdf5
rgt0552_veloc0.hdf5  rgt0695_veloc0.hdf5  rgt0872_veloc0.hdf5
rgt0558_veloc0.hdf5  rgt0705_veloc0.hdf5  rgt0888_veloc0.hdf5
rgt0568_

In [259]:
!h5ls -r /home/jovyan/shared/surface_velocity/ATL06_out/rgt0589_veloc0.hdf5

/                        Group
/along_track_step        Dataset {SCALAR}
/dx                      Dataset {SCALAR}
/gt1l                    Group
/gt1l/best_lags          Dataset {397}
/gt1l/correlation_coefficients Dataset {397}
/gt1l/first_cycle_time   Dataset {SCALAR}
/gt1l/latitudes          Dataset {397}
/gt1l/longitudes         Dataset {397}
/gt1l/second_cycle_time  Dataset {SCALAR}
/gt1l/segment_ids        Dataset {397}
/gt1l/velocities         Dataset {397}
/gt1l/x_atc              Dataset {397}
/gt1r                    Group
/gt1r/best_lags          Dataset {397}
/gt1r/correlation_coefficients Dataset {397}
/gt1r/first_cycle_time   Dataset {SCALAR}
/gt1r/latitudes          Dataset {397}
/gt1r/longitudes         Dataset {397}
/gt1r/second_cycle_time  Dataset {SCALAR}
/gt1r/segment_ids        Dataset {397}
/gt1r/velocities         Dataset {397}
/gt1r/x_atc              Dataset {397}
/gt2l                    Group
/gt2l/best_lags          Dataset {304}
/gt2l/correlation_coefficie

In [326]:
coeffs > 0.65

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True,  True, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False, False,  True,  True,  True,  True,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [319]:
f = h5py.File(file, 'r')
lats = f[f'/{beam}/latitudes'][()]
lons = f[f'/{beam}/longitudes'][()]
coeffs = f[f'/{beam}/correlation_coefficients'][()]
xy=np.array(pyproj.proj.Proj(epsg)(lons,lats))

plt.figure()
plt.scatter(xy[0], xy[1], 10, coeffs)
# f.close()
print(f[f'/{beam}/'].keys())

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<KeysViewHDF5 ['best_lags', 'correlation_coefficients', 'first_cycle_time', 'latitudes', 'longitudes', 'second_cycle_time', 'segment_ids', 'velocities', 'x_atc']>


In [336]:
moa_datapath = '/srv/tutorial-data/land_ice_applications/'
spatial_extent = np.array([-102, -76, -98, -74.5])
spatial_extent = np.array([-65, -86, -55, -81])


lat=spatial_extent[[1, 3, 3, 1, 1]]
lon=spatial_extent[[2, 2, 0, 0, 2]]
# project the coordinates to Antarctic polar stereographic
xy=np.array(pyproj.Proj(3031)(lon, lat))
# get the bounds of the projected coordinates 
XR=[np.nanmin(xy[0,:]), np.nanmax(xy[0,:])]
YR=[np.nanmin(xy[1,:]), np.nanmax(xy[1,:])]
MOA=pc.grid.data().from_geotif(os.path.join(moa_datapath, 'MOA','moa_2009_1km.tif'), bounds=[XR, YR])

epsg=3031

# show the mosaic:
plt.close('all')
plt.figure(figsize=[8,8])
hax0=plt.gcf().add_subplot(111, aspect='equal')
MOA.show(ax=hax0,cmap='gray', clim=[14000, 17000])
# hax1=plt.gcf().add_subplot(212, aspect='equal', sharex=hax0, sharey=hax0)
# MOA.show(ax=hax1, cmap='gray', clim=[14000, 17000]);
plt.title('Correlation Coefficient')

results_files = glob.glob(out_path + '/*.hdf5')
# plt.figure()
for file in results_files:
    #print(file)
    with h5py.File(file, 'r') as f:
        for beam in beams:
            try:
                lats = f[f'/{beam}/latitudes'][()]
                lons = f[f'/{beam}/longitudes'][()]
                coeffs = f[f'/{beam}/correlation_coefficients'][()]
                xy=np.array(pyproj.proj.Proj(epsg)(lons,lats))

                h = hax0.scatter(xy[0], xy[1], 0.25, coeffs, vmin = 0, vmax = 1)

            except:
                pass
plt.colorbar(h)

outfile = out_path + 'correlation_coefficient.png'
plt.savefig(outfile)


# show the mosaic:
# plt.close('all')
plt.figure(figsize=[8,8])
hax2=plt.gcf().add_subplot(111, aspect='equal')
MOA.show(ax=hax2,cmap='gray', clim=[14000, 17000])
# hax1=plt.gcf().add_subplot(212, aspect='equal', sharex=hax0, sharey=hax0)
# MOA.show(ax=hax1, cmap='gray', clim=[14000, 17000]);
plt.title('Best lag')

results_files = glob.glob(out_path + '/*.hdf5')
# plt.figure()
for file in results_files:
    #print(file)
    with h5py.File(file, 'r') as f:
        for beam in beams:
            try:
                lats = f[f'/{beam}/latitudes'][()]
                lons = f[f'/{beam}/longitudes'][()]
                lags = f[f'/{beam}/best_lags'][()]
                xy=np.array(pyproj.proj.Proj(epsg)(lons,lats))

                h = hax2.scatter(xy[0], xy[1], 0.25, lags, vmin = -10, vmax = 10)

            except:
                pass
plt.colorbar(h)

outfile = out_path + 'best_lag.png'
plt.savefig(outfile)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'cmap': 'gray', 'clim': [14000, 17000], 'extent': array([-887950., -356950.,  183825.,  561825.]), 'origin': 'lower'}


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'cmap': 'gray', 'clim': [14000, 17000], 'extent': array([-887950., -356950.,  183825.,  561825.]), 'origin': 'lower'}




# and masked by correlation coefficient

In [335]:
# correlation_threshold = 0.65

moa_datapath = '/srv/tutorial-data/land_ice_applications/'
spatial_extent = np.array([-102, -76, -98, -74.5])
spatial_extent = np.array([-65, -86, -55, -81])


lat=spatial_extent[[1, 3, 3, 1, 1]]
lon=spatial_extent[[2, 2, 0, 0, 2]]
# project the coordinates to Antarctic polar stereographic
xy=np.array(pyproj.Proj(3031)(lon, lat))
# get the bounds of the projected coordinates 
XR=[np.nanmin(xy[0,:]), np.nanmax(xy[0,:])]
YR=[np.nanmin(xy[1,:]), np.nanmax(xy[1,:])]
MOA=pc.grid.data().from_geotif(os.path.join(moa_datapath, 'MOA','moa_2009_1km.tif'), bounds=[XR, YR])

epsg=3031

# show the mosaic:
plt.close('all')
fig = plt.figure(figsize=[8,8])
hax0=fig.add_subplot(211, aspect='equal')
MOA.show(ax=hax0,cmap='gray', clim=[14000, 17000])
hax1=fig.add_subplot(212, aspect='equal')
MOA.show(ax=hax0,cmap='gray', clim=[14000, 17000])
# hax2=fig.add_subplot(311, aspect='equal')
# MOA.show(ax=hax0,cmap='gray', clim=[14000, 17000])

hax0.set_title('Correlation Coefficient, above correlation threshold ' + str(correlation_threshold))
hax1.set_title('Best lag, above correlation threshold ' + str(correlation_threshold))
# hax2.set_title('Best velocity, above correlation threshold ' + str(correlation_threshold))


results_files = glob.glob(out_path + '/*.hdf5')
# plt.figure()
for file in results_files:
    #print(file)
    with h5py.File(file, 'r') as f:
        for beam in beams:
            try:
                lats = f[f'/{beam}/latitudes'][()]
                lons = f[f'/{beam}/longitudes'][()]
                coeffs = f[f'/{beam}/correlation_coefficients'][()]
                lags = f[f'/{beam}/best_lags'][()]
                velocs = f[f'/{beam}/velocities'][()]

                xy=np.array(pyproj.proj.Proj(epsg)(lons,lats))
                ixs = coeffs > correlation_threshold

                h0 = hax0.scatter(xy[0][ixs], xy[1][ixs], 0.25, coeffs[ixs], vmin = correlation_threshold, vmax = 1)
                h1 = hax1.scatter(xy[0][ixs], xy[1][ixs], 0.25, lags[ixs], vmin = -7, vmax = 7)
#                 h2 = hax2.scatter(xy[0][ixs], xy[1][ixs], 0.25, velocs[ixs], vmin = -1000, vmax = 1000)
            except:
                pass
fig.colorbar(h0, ax = hax0)
fig.colorbar(h1, ax = hax1)


outfile = out_path + 'results_masked.png'
plt.savefig(outfile)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'cmap': 'gray', 'clim': [14000, 17000], 'extent': array([-887950., -356950.,  183825.,  561825.]), 'origin': 'lower'}
{'cmap': 'gray', 'clim': [14000, 17000], 'extent': array([-887950., -356950.,  183825.,  561825.]), 'origin': 'lower'}


# Older version:

In [81]:
cycles = ['03','04','05','06','07'] # not doing 1 and 2, because don't overlap exactly
# this could be future work

beams = ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']

# try and smooth without filling nans
dx = 20 # x_atc coordinate distance
smoothing_window_size = int(np.round(40 / dx)) # meters / dx;
# ex., 60 m smoothing window is a 3 point running average smoothed dataset, because each point is 20 m apart
filt = np.ones(smoothing_window_size)
smoothed = True

segment_length = 2000 # m
search_width = 800 # m

along_track_step = 100 # m; how much to jump between each veloc determination

max_percent_nans = 10 # what % of segment length can be nans

velocities = {}   
correlations = {}     
lags = {}
x_atcs_for_velocities = {}
latitudes = {}
longitudes = {}
rgts_with_errors = []
total_number_repeat_tracks_processed = 0
for ir, rgt in enumerate(rgts.keys()):
    if ir == 18: # just process a few for the moment
        try:
            print('\nProcessing rgt ' + rgt + ', #' +str(ir) + ' of ' + str(len(rgts.keys())))

            ### load all files for this rgt
            rgt_files = glob.glob(os.path.join(datapath, f'*ATL06_*_{rgt}*_003*.h5'))
            n_rgt_files_cycle3_and_after = 0
            for file in rgt_files:
                if float(file.split('/')[-1].split('_')[3][4:6]) >= 3:
                    n_rgt_files_cycle3_and_after += 1

            print('There are ' +str(n_rgt_files_cycle3_and_after) + ' files available for this track from cycle 3 onward')


            ### only process if there is at least one repeat track during the time period when data overlapped
            if n_rgt_files_cycle3_and_after >= 2:
                ### extract data from all available cycles
                x_atc = {}
                lats = {}
                lons = {}
                h_li_raw = {} # unsmoothed data; equally spaced x_atc, still has nans 
                h_li_raw_NoNans = {} # unsmoothed data; equally spaced x_atc, nans filled with noise
                h_li = {} # smoothed data, equally spaced x_atc, nans filled with noise 
                h_li_diff = {}
                times = {}
                min_seg_ids = {}
                segment_ids = {}


                cycles_this_rgt = []
                for cycle in cycles:
                    # load data that matches cycle; put into dictionaries to use shortly
                    Di = {}
                    x_atc[cycle] = {}
                    lats[cycle] = {}
                    lons[cycle] = {}
                    h_li_raw[cycle] = {}
                    h_li_raw_NoNans[cycle] = {}
                    h_li[cycle] = {}
                    h_li_diff[cycle] = {}
                    times[cycle] = {}
                    min_seg_ids[cycle] = {}
                    segment_ids[cycle] = {}

                    filenames = glob.glob(os.path.join(datapath, f'*ATL06_*_{rgt}{cycle}*_003*.h5'))
                    #print(filenames)
                    error_count=0
                    for filename in filenames:
                        try:
                            for beam in beams:
                                Di[filename]=atl06_to_dict(filename,'/'+ beam, index=None, epsg=3031)

                                times[cycle][beam] = Di[filename]['data_start_utc']

                                # extract h_li and x_atc, and lat/lons for that section                
                                x_atc_tmp = Di[filename]['x_atc']
                                h_li_tmp = Di[filename]['h_li']#[ixs]
                                lats_tmp = Di[filename]['latitude']
                                lons_tmp = Di[filename]['longitude']


                                # segment ids:
                                seg_ids = Di[filename]['segment_id']
                                min_seg_ids[cycle][beam] = seg_ids[0]
                                #print(len(seg_ids), len(x_atc_tmp))

                                # make a monotonically increasing x vector
                                # assumes dx = 20 exactly, so be carefull referencing back
                                ind = seg_ids - np.nanmin(seg_ids) # indices starting at zero, using the segment_id field, so any skipped segment will be kept in correct location
                                x_full = np.arange(np.max(ind)+1) * 20 + x_atc_tmp[0]
                                h_full = np.zeros(np.max(ind)+1) + np.NaN
                                h_full[ind] = h_li_tmp
                                lats_full = np.zeros(np.shape(x_full)) * np.nan
                                lats_full[ind] = lats_tmp
                                lons_full = np.zeros(np.shape(x_full)) * np.nan
                                lons_full[ind] = lons_tmp
                                
                                ## save the segment id's themselves, with gaps filled in
                                segment_ids[cycle][beam] = np.zeros(np.max(ind)+1) + np.NaN
                                segment_ids[cycle][beam][ind] = seg_ids

                                
                                x_atc[cycle][beam] = x_full
                                h_li_raw[cycle][beam] = h_full # preserves nan values
                                lons[cycle][beam] = lons_full
                                lats[cycle][beam] = lats_full

                                ### fill in nans with noise h_li datasets
            #                         h = ma.array(h_full,mask =np.isnan(h_full)) # created a masked array, mask is where the nans are
            #                         h_full_filled = h.mask * (np.random.randn(*h.shape)) # fill in all the nans with random noise

                                ### interpolate nans in pandas
                                # put in dataframe for just this step; eventually rewrite to use only dataframes?
                                data = {'x_full': x_full, 'h_full': h_full}
                                df = pd.DataFrame(data, columns = ['x_full','h_full'])
                                #df.plot(x='x_full',y='h_full')
                                # linear interpolation for now
                                df['h_full'].interpolate(method = 'linear', inplace = True)
                                h_full_interp = df['h_full'].values
                                h_li_raw_NoNans[cycle][beam] = h_full_interp # has filled nan values


                                # running average smoother /filter
                                if smoothed == True:
                                    h_smoothed = (1/smoothing_window_size) * np.convolve(filt, h_full_interp, mode = 'same')
                                    h_li[cycle][beam] = h_smoothed

                                    # differentiate that section of data
                                    h_diff = (h_smoothed[1:] - h_smoothed[0:-1]) / (x_full[1:] - x_full[0:-1])
                                else: 
                                    h_li[cycle][beam] = h_full_interp
                                    h_diff = (h_full_interp[1:] - h_full_interp[0:-1]) / (x_full[1:] - x_full[0:-1])
                                h_li_diff[cycle][beam] = h_diff

                            cycles_this_rgt+=[cycle]


                        except KeyError as e:
                            print(f'file {filename} encountered error {e}')
                            error_count += 1

                    #print(f"For rgt {rgt} cycle {cycle}, read {len(Di)} data files of which {error_count} gave errors")


                ### Determine # of possible velocities:
                n_possible_veloc = len(cycles_this_rgt) -1 # naive, for now; can improve later
                for veloc_number in range(n_possible_veloc):
                    cycle1 = cycles_this_rgt[veloc_number]
                    cycle2 = cycles_this_rgt[veloc_number+1]
                    t1_string = times[cycle1]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
                    t1 = Time(t1_string)

                    t2_string = times[cycle2]['gt1l'][0].astype(str) #figure out later if just picking hte first one it ok
                    t2 = Time(t2_string)

                    dt = (t2 - t1).jd # difference in julian days


                    velocities[rgt] = {}   
                    correlations[rgt] = {}     
                    lags[rgt] = {}

                    for beam in beams:
                        # fig1, axs = plt.subplots(4,1)


                        ### determine x1: larger value for both beams, if different
                        min_x_atc_cycle1 = x_atc[cycle1][beam][0]
                        min_x_atc_cycle2 = x_atc[cycle2][beam][0]

                        # pick out the track that starts at greater x_atc, and use that as x1s vector
                        if min_x_atc_cycle1 != min_x_atc_cycle2: 
                            x1 = np.nanmax([min_x_atc_cycle1,min_x_atc_cycle2])
                            cycle_n = np.arange(0,2)[[min_x_atc_cycle1,min_x_atc_cycle2] == x1][0]
                            if cycle_n == 0:
                                cycletmp = cycle2
                            elif cycle_n == 1:
                                cycletmp = cycle1
                            n_segments_this_track = (len(x_atc[cycletmp][beam]) - search_width/dx) / (along_track_step/dx)
                            x1s = x_atc[cycletmp][beam][int(search_width/dx)+1::int(search_width/dx)]
                            # start at search_width/dx in, so the code never tries to get data outside the edges of this rgt
                            # add 1 bc the data are differentiated, and h_li_diff is therefore one point shorter

                        elif min_x_atc_cycle1 == min_x_atc_cycle2: # doesn't matter which cycle
                            x1s = x_atc[cycle1][beam][int(search_width/dx)+1::int(search_width/dx)]

                        ### determine xend: smaller value for both beams, if different
                        max_x_atc_cycle1 = x_atc[cycle1][beam][-1]
                        max_x_atc_cycle2 = x_atc[cycle2][beam][-1]
                        smallest_xatc = np.min([max_x_atc_cycle1,max_x_atc_cycle2])
                        ixmax = np.where(x1s >= smallest_xatc - search_width/dx)
                        if len(ixmax[0]) >= 1:
                            ixtmp = ixmax[0][0]
                            x1s = x1s[:ixtmp]

                        ### dicts to store info in
                        velocities[rgt][beam] = np.empty_like(x1s)
                        correlations[rgt][beam] = np.empty_like(x1s)
                        lags[rgt][beam] = np.empty_like(x1s)

                        midpoints_x_atc = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_lat = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_lon = np.empty(np.shape(x1s)) # for writing out 
                        midpoints_seg_ids = np.empty(np.shape(x1s)) # for writing out 
                        
                        for xi, x1 in enumerate(x1s):
                            # cut out small chunk of data at time t1 (first cycle)
                            x_full_t1 = x_atc[cycle1][beam]
                            ix_x1 = np.arange(len(x_full_t1))[x_full_t1 >= x1][0]
                            ix_x2 = ix_x1 + int(np.round(segment_length/dx))      
                            x_t1 = x_full_t1[ix_x1:ix_x2]
                            lats_t1 = lats[cycle1][beam][ix_x1:ix_x2]
                            lons_t1 = lons[cycle1][beam][ix_x1:ix_x2]
                            seg_ids_t1 = seg_ids[cycle1][beam][ix_x1:ix_x2]
                            h_li1 = h_li_diff[cycle1][beam][ix_x1-1:ix_x2-1] # start 1 index earlier because 
                            # the h_li_diff data are differentiated, and therefore one sample shorter

                            # find midpoints; this is the position where we will assign the velocity measurement from each window
                            n = len(x_t1)
                            midpt_ix = int(np.floor(n/2))
                            midpoints_x_atc[xi] = x_t1[midpt_ix]
                            midpoints_lat[xi] = lats_t1[midpt_ix]
                            midpoints_lon[xi] = lons_t1[midpt_ix]
                            midpoints_seg_ids[xi] = seg_ids_t1[midpt_ix]
                            
                            # cut out a wider chunk of data at time t2 (second cycle)
                            x_full_t2 = x_atc[cycle2][beam]
                            ix_x3 = ix_x1 - int(np.round(search_width/dx)) # offset on earlier end by # indices in search_width
                            ix_x4 = ix_x2 + int(np.round(search_width/dx)) # offset on later end by # indices in search_width
                            x_t2 = x_full_t2[ix_x3:ix_x4]
                            h_li2 = h_li_diff[cycle2][beam][ix_x3-1:ix_x4-1]# start 1 index earlier because 
                            # the h_li_diff data are differentiated, and therefore one sample shorter

                            # plot data
                            # axs[0].plot(x_t2, h_li2, 'r')
                            # axs[0].plot(x_t1, h_li1, 'k')
                            # axs[0].set_xlabel('x_atc (m)')

                            ### if there are fewer than 10% nans in either data chunk:
                            n_nans1 = np.sum(np.isnan(h_li_raw[cycle1][beam][ix_x1:ix_x2]))
                            n_nans2 = np.sum(np.isnan(h_li_raw[cycle2][beam][ix_x3:ix_x4]))

                            if (n_nans1 / len(h_li1) <= max_percent_nans/100) and (n_nans2 / len(h_li2) <= max_percent_nans/100):

                                # correlate old with newer data
                                # detrend both chunks of data
                                h_li1 = detrend(h_li1,type = 'linear')
                                h_li2 = detrend(h_li2,type = 'linear')

                                # normalize both chunks of data
            #                         h_li1 = h_li1 / np.nanmax(np.abs(h_li1))
            #                         h_li2 = h_li2 / np.nanmax(np.abs(h_li2))

                                corr = correlate(h_li1, h_li2, mode = 'valid', method = 'direct') 

                                # a better way to normalize correlation function: shifting along longer vector
                                # normalize by autocorrelations
                                coeff_a_val = np.sum(h_li1**2)
                                coeff_b_val = np.zeros(len(h_li2) - len(h_li1)+1)
                                for shift in range(len(h_li2) - len(h_li1)+1):
                                    h_li2_section = h_li2[shift:shift + len(h_li1)]
                                    coeff_b_val[shift] = np.sum(h_li2_section **2)
                                norm_vec = np.sqrt(coeff_a_val * coeff_b_val)
                                corr_normed = corr / np.flip(norm_vec) # i don't really understand why this has to flip, but it does


                        #         lagvec = np.arange( -(len(h_li1) - 1), len(h_li2), 1)# for mode = 'full'
                        #         lagvec = np.arange( -int(search_width/dx) - 1, int(search_width/dx) +1, 1) # for mode = 'valid'
                                lagvec = np.arange(- int(np.round(search_width/dx)), int(search_width/dx) +1,1)# for mode = 'valid'

                                shift_vec = lagvec * dx

                                ix_peak = np.arange(len(corr_normed))[corr_normed == np.nanmax(corr_normed)][0]
                                best_lag = lagvec[ix_peak]
                                best_shift = shift_vec[ix_peak]
                                velocities[rgt][beam][xi] = best_shift/(dt/365)
                                correlations[rgt][beam][xi] = corr_normed[ix_peak]
                                lags[rgt][beam][xi] = lagvec[ix_peak]
                            else:
                                velocities[rgt][beam][xi] = np.nan
                                correlations[rgt][beam][xi] = np.nan
                                lags[rgt][beam][xi] = np.nan
                                
                                
                        ### Add velocities to hdf5 file for each beam
                        h5_file_out = f'{out_path}rgt{rgt}.hdf5'
                        with h5py.File(h5_file_out, 'w') as f:
                            f[beam +'/x_atc'] = midpoints_x_atc # assign x_atc value of half way along the segment
                            f[beam +'/latitudes'] = midpoints_lat # assign x_atc value of half way along the segment
                            f[beam +'/longitudes'] = midpoints_lon # assign x_atc value of half way along the segment
                            f[beam +'/velocities'] = velocities[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/correlation_coefficients'] = correlations[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/best_lags'] = lags[rgt][beam] # assign x_atc value of half way along the segment
                            f[beam +'/segment_ids'] = midpoints_seg_ids
                            
#                         f'{out_path}rgt{rgt}_{beam}.txt'
#                         f = open(file_out,'w')


#                         header0 = 'segment_length='+str(segment_length)+',segment_step='+str((dx))+'m,search_width='+str(search_width) + 'm'
#                         header = 'x_atc_segment_middle'
#                         for beam in beams:
#                             header = header + ',' + beam + '_veloc,' + beam + '_correlationValue'
#                         f.write(header0 + '\n')
#                         f.write(header + '\n')
                
                
                total_number_repeat_tracks_processed += 1
                

                
                
        except (ValueError, IndexError):
            print(f'rgt {rgt} encountered an error')
            rgts_with_errors.append(rgt)
            
print(f'Total number of repeat tracks successfully processed = {total_number_repeat_tracks_processed}')

                    # axs[1].plot(lagvec,corr)
                    # axs[1].plot(lagvec[ix_peak],corr[ix_peak], 'r*')
                    # axs[1].set_xlabel('lag (samples)')

                    # axs[2].plot(shift_vec,corr)
                    # axs[2].plot(shift_vec[ix_peak],corr[ix_peak], 'r*')
                    # axs[2].set_xlabel('shift (m)')

                    ## plot shifted data
                    # axs[3].plot(x_t2, h_li2, 'r')
                    # axs[3].plot(x_t1 - best_shift, h_li1, 'k')
                    # axs[3].set_xlabel('x_atc (m)')

                    # axs[0].text(x_t2[100], 0.6*np.nanmax(h_li2), beam)
                    # axs[1].text(lagvec[5], 0.6*np.nanmax(corr), 'best lag: ' + str(best_lag) + '; corr val: ' + str(np.round(corr[ix_peak],3)))
                    # axs[2].text(shift_vec[5], 0.6*np.nanmax(corr), 'best shift: ' + str(best_shift) + ' m'+ '; corr val: ' + str(np.round(corr[ix_peak],3)))
                    # axs[2].text(shift_vec[5], 0.3*np.nanmax(corr), 'veloc of ' + str(np.round(best_shift/(dt/365),1)) + ' m/yr')






Processing rgt 0894, #18 of 218
There are 2 files available for this track from cycle 3 onward
rgt 0894 encountered an error
Total number of repeat tracks successfully processed = 0
