# Workflow to Validate ARIA Secular Displacement Requirement

- Original code authored by: David Bekaert, Heresh Fattahi, Eric Fielding, and Zhang Yunjun with 
Extensive modifications by Adrian Borsa and Amy Whetter and other NISAR team members 2022

- Updated for OPERA requirements by Simran Sangha, Marin Govorcin, and Al Handwerger

<br>
<b><I>Notebook to Validate ARIA Secular Displacement Requirement</I></b><br>
- Author: Jinwoo Kim, Simran Sangha, Mary Grace Bato, February 2025

<div class="alert alert-warning">
Both the initial setup (<b>Prep A</b> section) and download of the data (<b>Prep B</b> section) should be run at the start of the notebook. And all subsequent sections NEED to be run in order.
</div>



In [None]:
# Parameters for papermill

### Choose a site from the 'sites' dictionary found 2 cells down
## If your study area is not defined, add a new dictionary entry as appropriate and provide a unique site keyname
site = 'D115'

findMax = 'false' # set to 'true' if you want to find the maximum threshold, set to 'false' if you want to find the minimum threshold

In [None]:
### Define list of requirements
## Static for ARIA Cal/Val requirements, do not touch!

# Define secular requirements
secular_gnss_rqmt = 3  # mm/yr for 3 years of data over length scales of 0.1-50 km ### 5 for OPERA
gnss_dist_rqmt = [0.1, 50.0]  # km
secular_insar_rqmt = 3  # mm/yr  ### 5 for OPERA
insar_dist_rqmt = [0.1, 50.0]  # km

# Define temporal sampling requirement
insar_sampling = 12 # days
insar_sampling_percentage = 80 # percentage of acquitions at 12 day sampling (insar_sampling) or better
insar_timespan_requirement = 4 # years

# Set mask file
maskFile = 'mask_custom.h5' # maskTempCoh.h5 maskConnComp.h5 waterMask.h5 (maskConnComp.h5 is very conservative)

pixel_radius = 0   #number of InSAR pixels to average for comparison with GNSS

In [None]:
# Parameters for papermill

gnss_source = 'UNR'
work_dir = './'
mintpy_dir = 'MINTPY'    # location of mintpy files
output_dir = 'results'          # location to store output figures and text files
vmin = -20  # vmin/vmax for plotting
vmax = 20

# define input files
vel_file = 'velocity_msk.h5'
insar_ts_file = 'timeseries_ERA5_demErr.h5'

calval_sites_csv = '../ARIA_CalVal_sites.csv'

# specify GNSS source for validation
from mintpy.objects import gnss
print(f'Searching for all GNSS stations from source: {gnss_source}')
print(f'May use any of the following supported sources: {gnss.GNSS_SOURCES}')
GNSS = gnss.get_gnss_class(gnss_source)
gnss_dir = f'GNSS-{gnss_source}'

# coh file used for validation
coh_file = 'temporalCoherence.h5' # avgSpatialCoh.h5, temporalCoherence.h5
coherence_threshold = 0.6
apply_coh_mask = False

recommended_mask_file = 'ref_vel_msk.h5'
apply_recommended_mask = True

# outlier removal
outlier_removal_method = 'modified_zscore'        # 'zscore', 'modified_zscore'
outlier_zscore_threshold = 2.0
apply_outlier_removal = True

#Set GNSS Parameters
gnss_completeness_threshold = 0.8    #ratio of data timespan with valid GNSS epochs
gnss_residual_stdev_threshold = 10.  #max threshold standard deviation of residuals to linear GNSS fit

#variability score threshold
thr_var_score = 0.4      # variability score threshold
apply_nonlinear_mask = False

In [None]:
# Standard library imports
import math
import os
import h5py
from datetime import datetime as dt
from pathlib import Path
import warnings

# Third-party imports
import imgkit  # pip install imgkit / conda install -c conda-forge wkhtmltopdf
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.colors
from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec
import matplotlib.patches as mpatches
import matplotlib.ticker as mticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
import rioxarray
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from pyproj import CRS, Transformer
import rasterio
from affine import Affine
from scipy import signal
from scipy import stats     # for outlier removal

# Configure matplotlib
plt.rcParams.update({'font.size': 12})

# Local application/library-specific imports
from mintpy.cli import generate_mask, reference_point, view
from mintpy.objects import timeseries
from mintpy.utils import ptime, readfile, utils as ut, utils0 as ut0
from solid_utils.plotting import display_validation, display_validation_table
from solid_utils.sampling import haversine_distance, load_geo, profile_samples, samp_pair

from copy import deepcopy

# Suppress specific warnings
warnings.filterwarnings("ignore")

## Define CalVal Site 

In [None]:
validation_data='ARIA'

################# Set Directories ##########################################
print('\nCurrent directory:', os.getcwd())

if 'work_dir' not in locals():
    work_dir = Path.cwd()

work_dir = os.path.abspath(work_dir)    # absolute path       
print("Work directory:", work_dir)
os.makedirs(work_dir, exist_ok=True)
os.chdir(work_dir)  # Change to Workdir   

output_dir = f'{work_dir}/{output_dir}'     # absolute path of output directory
os.makedirs(output_dir, exist_ok=True)
print("   output  dir:", output_dir)

mintpy_dir = f'{work_dir}/{mintpy_dir}'     # absolute path of mintpy directory 
if not os.path.isdir(mintpy_dir):
    raise FileNotFoundError(f"The folder '{mintpy_dir}' does not exist.")
print("   MintPy  dir:", mintpy_dir)

def force_symlink(src, dst):
    try:
        os.symlink(src, dst)
    except FileExistsError:
        os.unlink(dst)
        os.symlink(src, dst)

# setup symlinks of GNSS folders inside of the MintPy subdirectory
gnss_csv = f'GNSS_record/{site}.csv'
rejected_gnss_csv_file = f'GNSS_record/{site}_rejectedstations.csv'

force_symlink(os.path.abspath(gnss_csv),
           f'{mintpy_dir}/{gnss_csv.split("/")[-1]}')
force_symlink(os.path.abspath(rejected_gnss_csv_file),
           f'{mintpy_dir}/{rejected_gnss_csv_file.split("/")[-1]}')
force_symlink(os.path.abspath(gnss_dir),
           f'{mintpy_dir}/{gnss_dir}')

gnss_csv = f'{site}.csv'
rejected_gnss_csv_file = f'{site}_rejectedstations.csv'

############################################################################
### List of ARIA Cal/Val Sites for secular requirements:
if os.path.exists(calval_sites_csv):
    sites_df = pd.read_csv(calval_sites_csv)
else:
    raise FileNotFoundError(f"The file {calval_sites_csv} does not exist.")  

display(sites_df)

secular_available_sites = sites_df['site'].values

In [None]:
# Define functions
fontsize=8

def _rmse(predictions, targets):
    return np.sqrt(np.nanmean((np.ma.masked_invalid(predictions) - np.ma.masked_invalid(targets)) ** 2))


def _plot_subplots(fig, subplot_pos):
    spec = GridSpec(ncols=2, nrows=1, figure=fig)
    ax =  {}
    for p in subplot_pos.keys(): ax[p] = fig.add_subplot(spec[0,0])
    for fig_label in ax: ax[fig_label].set_position(subplot_pos[fig_label])
    return fig, ax


def plot_scatterplot(ax,
                     data1_ts, data2_ts,
                     label1, label2,
                     scale=1e2, fontsize=6, 
                     unit='cm', **kwargs):
    
    # Convert to pandas
    data1_df = pd.DataFrame(data1_ts).T
    data1_df = data1_df.rename(columns={0:'date1', 1:'disp1'})

    data2_df = pd.DataFrame(data2_ts).T
    data2_df = data2_df.rename(columns={0:'date2', 1:'disp2'})

    # Merge, and keep common dates
    merged_df = pd.merge(data1_df, data2_df,
                         left_on='date1', right_on='date2',
                         how='inner')

    # Get min, and max
    df_min = np.round(np.min(merged_df[['disp1', 'disp2']].min()), 2)
    df_max = np.round(np.max(merged_df[['disp1', 'disp2']].max()), 2)
    ax_range = np.max(np.abs([df_min, df_max]))
    ax_range += ax_range*0.2 # increase by 20%
    ax_lims = [-ax_range*scale, ax_range*scale]

    # text upper corner
    txt_xy = ax_range- ax_range*0.1
    txt_xy *= scale

    yrange = (ax_range*2) * scale
    r = yrange/20

    # Replace pandas nat with nan
    merged_df['disp1'] = merged_df['disp1'].replace({pd.NaT: np.nan})
    merged_df['disp2'] = merged_df['disp2'].replace({pd.NaT: np.nan})

    # Plot
    ax.plot(merged_df.disp1*scale, merged_df.disp2*scale, 'o', **kwargs)
    
    ax.plot(ax_lims, ax_lims, lw=0.5, color='navy')
    ax.set_ylabel(f'{label1} [{unit}]', fontsize=fontsize, labelpad=-0.1)
    ax.set_xlabel(f'{label2} [{unit}]', fontsize=fontsize, labelpad=-0.1)
    ax.set_xlim(ax_lims)
    ax.set_ylim(ax_lims)

    # Get stats
    merged_df = merged_df.dropna()
    rmse = _rmse(merged_df.disp1.values, merged_df.disp2.values)
    mad = stats.median_abs_deviation(merged_df.disp1 - merged_df.disp2)
    r2 = stats.pearsonr(np.float64(merged_df.disp1), np.float64(merged_df.disp2))[0]

    ax.text(-txt_xy, txt_xy, f'R2: {r2*scale:.2f}', weight='bold', fontsize=fontsize-1)
    ax.text(-txt_xy, txt_xy-r, f'RMSE: {rmse*scale:.2f} {unit}', weight='bold', fontsize=fontsize-1)
    ax.text(-txt_xy, txt_xy-r*2, f'MAD: {mad*scale:.2f} {unit}', weight='bold', fontsize=fontsize-1)
    ax.tick_params(direction='in', labelsize=fontsize, length=2)
    return merged_df


def plot_histogram(ax, difference, bins=20, scale=1e2, unit='cm', fontsize=6):
    # histogram of differences
    ax.hist(difference*scale, bins=bins, color='red', alpha=0.5)
    ax.set_xlabel(f'Diff. [{unit}]', fontsize=fontsize, labelpad=-0.2)
    ax.tick_params(direction='in', labelsize=fontsize, length=2, pad=1.5)
    ax.axvline(difference.mean()*scale, color='darkred', linestyle='--', label='Mean')
    return ax

## Table of Contents:
<a id='secular_TOC'></a>

<hr/>

[**Prep A. Environment Setup**](#secular_prep_a)

[**Prep B. Data Staging**](#secular_prep_b)

[**1. Generate Interferogram Stack**](#secular_gen_ifg)
- [1.1.  Generate interferograms using dolphin](#secular_crop_ifg)

[**2. Generation of Time Series from Interferograms**](#secular_gen_ts)
- [2.1. Set Up MintPy Configuration file](#secular_setup_config)
- [2.2. Load Data into MintPy](#secular_load_data)
- [2.3. Generate Quality Control Mask](#secular_generate_mask)

[**3. Optional Corrections**](#secular_opt_correction)
- [3.1. Topographic Residual Correction ](#secular_topo_corr) 

[**4. Estimate ARIA and GNSS Velocities**](#secular_decomp_ts)
- [4.1. Estimate ARIA LOS Velocities](#secular_insar_vel1)
- [4.2. Find Collocated GNSS Stations](#secular_co_gps)  
- [4.3. Get GNSS Position Time Series](#secular_gps_ts) 
- [4.4. Make GNSS LOS Velocities](#secular_gps_los)
- [4.5. Re-Reference GNSS and ARIA Velocities](#secular_gps_insar)

[**5. ARIA Validation Approach 1: GNSS-ARIA Direct Comparison**](#secular_ARIA_validation)
- [5.1. Make Velocity Residuals at GNSS Locations](#secular_make_vel)
- [5.2. Make Double-differenced Velocity Residuals](#secular_make_velres)
- [5.3. Secular Requirement Validation: Method 1](#secular_valid_method1)

[**6. ARIA Validation Approach 2: InSAR-only Structure Function**](#secular_ARIA_validation2)
- [6.1. Read Array and Mask Pixels with no Data](#secular_array_mask)
- [6.2. Randomly Sample Pixels and Pair Them Up with Option to Remove Trend](#secular_remove_trend)
- [6.3. Amplitude vs. Distance of Relative Measurements (pair differences)](#secular_M2ampvsdist2)
- [6.4. Bin Sample Pairs by Distance Bin and Calculate Statistics](#secular_M2RelMeasTable)

[**Appendix: Supplementary Comparisons and Plots**](#secular_appendix1)
- [A.1. Compare Raw Velocities](#secular_compare_raw)
- [A.2. Plot Velocity Residuals](#secular_plot_vel)
- [A.3. Plot Double-differenced Residuals](#secular_plot_velres)
- [A.4. GPS Position Plot](#secular_appendix_gps)

<hr/>

<a id='secular_prep_a'></a>
## Prep A. Environment Setup
Setup your environment for processing data

In [None]:
#Set Global Plot Parameters
vel_file = os.path.join(mintpy_dir, vel_file)
insar_ts_file = os.path.join(mintpy_dir, insar_ts_file)

if os.path.exists(vel_file) and os.path.exists(insar_ts_file):
    print(f'{vel_file} and {insar_ts_file} exist and we can continue this validation.')
else:
    raise FileNotFoundError(f"The {vel_file} and/or {insar_ts_file} do not exist and are required for this validation.")

if site not in secular_available_sites:
    msg = '\nSelected site not available! Please select one of the following sites:: \n{}'.format(secular_available_sites)
    raise Exception(msg)
else:
    print('\nSelected site: {}'.format(site))
    display(sites_df[sites_df['site'] == site])

os.chdir(mintpy_dir)  # move to MintPy directory

# step events (earthquake, volcano)
step_events_date = sites_df[sites_df['site'] == site]['steps'].iloc[0]
if pd.isna(step_events_date):
    step_events_date = None
else:
    step_events_date = sites_df[sites_df['site'] == 'A064']['steps'].iloc[0].split()
    step_events_date = [str(i) for i in step_events_date]

if step_events_date is not None and step_events_date:
    step_model = {'polynomial': 1, 'stepdate': step_events_date}
else:  # Added missing colon here
    step_model = None

In [None]:
# reading area
insar_metadata = readfile.read_attribute(vel_file)

ARIA_region = list(ut.four_corners(insar_metadata))

geo_S, geo_N, geo_W, geo_E = ARIA_region
ARIA_region_geo = (geo_S, geo_N, geo_W, geo_E)

print('region of ARIA footprint (lat/lon): ', ARIA_region_geo)

<a id='secular_gen_ts'></a>
# 2. Generation of Time Series from Interferograms

<a id='secular_load_data'></a>
## 2.1. Load Data into MintPy

The output of this step is an "inputs" for MintPy processing containing HDF5 files:
- timeseries.h5: This file contains cumulative timesires and multiple metadata

<a id='secular_generate_mask'></a>
## 2.2. Generate Quality Control Mask

Mask files can be can be used to mask pixels in the time-series processing. Below we generate a mask file based on the connected components, which is a metric for unwrapping quality.

In [None]:
if apply_coh_mask:
    # Apply coherence-based mask (spatial/temporal coherence)
    
    coh_file_path = os.path.join(f'{mintpy_dir}/avg_lyrs/', coh_file)

    if (os.path.basename(coh_file) == 'avgSpatialCoh.h5'):
        with h5py.File(coh_file_path, 'r') as f:
            coh = f['coherence'][:]
    elif (os.path.basename(coh_file) == 'temporalCoherence.h5'):
        with h5py.File(coh_file_path, 'r') as f:
            coh = f['temporalCoherence'][:]
    else:
        raise FileNotFoundError(f"The coherence file, {coh_file}, not found.")

    coh_mask = (coh >= coherence_threshold).astype(np.int8)

    colors = ['#f0f0f0', '#ed2939']  
    cmap = matplotlib.colors.ListedColormap(colors)

    plt.figure(figsize=(12, 10), dpi=100)
    im = plt.imshow(coh_mask, cmap=cmap)
    cbar = plt.colorbar(im, shrink=0.6)
    cbar.set_ticks([0.25, 0.75])
    cbar.set_ticklabels(['Low coherence', 'High coherence'])

    plt.title('Coherence-based mask', fontsize=14, pad=15)
    plt.axis('off')
    plt.tight_layout()

In [None]:
if apply_recommended_mask: 
    with h5py.File(recommended_mask_file, 'r') as f:
        reliability_mask = f['mask'][:]

    colors = ['#f0f0f0', '#31a354']  # Light gray and forest green
    cmap = matplotlib.colors.ListedColormap(colors)

    plt.figure(figsize=(12, 10), dpi=100)
    im = plt.imshow(reliability_mask, cmap=cmap)
    cbar = plt.colorbar(im, shrink=0.6)
    cbar.set_ticks([0.25, 0.75])
    cbar.set_ticklabels(['Low coherence', 'High coherence'])

    plt.title('Reliability mask based on recommended masks', fontsize=14, pad=15)
    plt.axis('off')
    plt.tight_layout()


<a id='secular_common_latlon'></a>
## 2.3. Reference Interferograms To Common Lat/Lon

In [None]:
lat = sites_df[sites_df['site'] == site]['reference_lalo'].values[0].split()[0]
lon = sites_df[sites_df['site'] == site]['reference_lalo'].values[0].split()[1]

iargs = [insar_ts_file, '-l', lat, '-L', lon]
reference_point.main(iargs)

In [None]:
# Get date list
date_list = timeseries(insar_ts_file).get_date_list()
num_date = len(date_list)
date0, date1 = date_list[0], date_list[-1]
insar_dates = ptime.date_list2vector(date_list)[0]

# Check temporal sampling
insar_sampling_arr = []
for i in range(len(insar_dates)-1):
    diff = (insar_dates[i+1] - insar_dates[i]).days
    insar_sampling_arr.append(diff)

count = 0
for i in insar_sampling_arr:
    if i <= insar_sampling:
        count += 1

percentage = (count / len(insar_sampling_arr)) * 100
timespan_of_insar=(insar_dates[len(insar_dates)-1]-insar_dates[0]).days /365.25

# Overall pass/fail criterion
if percentage >= insar_sampling_percentage:
    print(f'This velocity dataset ({percentage}%) passes the temporal sampling requirement ({insar_sampling_percentage}%)')
else:
    print(f'This velocity dataset ({percentage}%) does NOT pass the temporal sampling requirement ({insar_sampling_percentage}%)')

if timespan_of_insar >= insar_timespan_requirement:
    print(f'This velocity dataset ({timespan_of_insar} years) passes the timespan requirement ({insar_timespan_requirement} years)')
else:
    print(f'This velocity dataset ({timespan_of_insar} years) does NOT pass the timespan requirement ({insar_timespan_requirement } years)')

<a id='secular_opt_correction'></a>
# 3. Optional Corrections

Optional corrections related with tropospheric, ionospheric delay, solid earth tide, and plate motion

<a id='secular_decomp_ts'></a>
# 4. Estimate InSAR and GNSS Velocities

<a id='secular_insar_vel1'></a>
## 4.1. Estimate InSAR LOS Velocities

Given a time series of InSAR LOS displacements, the observations for a given pixel, $U(t)$, can be parameterized as:

$$U(t) = a \;+\; vt \;+\; c_1 cos (\omega_1t - \phi_{1,}) \;+\; c_2 cos (\omega_2t - \phi_2) \;+\; \sum_{j=1}^{N_{eq}} \left( h_j+f_j F_j (t-t_j) \right)H(t - t_j) \;+\; \frac{B_\perp (t)}{R sin \theta}\delta z \;+\; residual$$ 

which includes a constant offset $(a)$, velocity $(v)$, and amplitudes $(c_j)$ and phases $(\phi_j)$ of annual $(\omega_1)$ and semiannual $(\omega_2)$ sinusoidal terms.  Where needed we can include additional complexity, such as coseismic and postseismic processes parameterized by Heaviside (step) functions $H$ and postseismic functions $F$ (the latter typically exponential and/or logarithmic).   $B_\perp(t)$, $R$, $\theta$, and $\delta z$ are, respectively, the perpendicular component of the interferometric baseline relative to the first date, slant range distance, incidence angle and topography error correction for the given pixel. 

Thus, given either an ensemble of interferograms or the output of SBAS (displacement vs. time), we can write the LSQ problem as 

$$ \textbf{G}\textbf{m} = \textbf{d}$$

where $\textbf{G}$ is the design matrix (constructed out of the different functional terms in Equation 2 evaluated either at the SAR image dates for SBAS output, or between the dates spanned by each pair for interferograms), $\textbf{m}$ is the vector of model parameters (the coefficients in Equation 2) and $\textbf{d}$ is the vector of observations.  For GPS time series, $\textbf{G}, \textbf{d}, \textbf{m}$ are constructed using values evaluated at single epochs corresponding to the GPS solution times, as for SBAS InSAR input. 

With this formulation, we can obtain InSAR velocity estimates and their formal uncertainties (including in areas where the expected answer is zero). 

The default InSAR velocity fit in MintPy is to estimate a mean linear velocity $(v)$ in in the equation, which we do below. 

In [None]:
# load velocity file
insar_velocities,_ = readfile.read(vel_file, datasetName = 'velocity')  # read velocity file
insar_velocities = insar_velocities * 1000.  # convert velocities from m to mm/yr

# set masked pixels to NaN
if apply_coh_mask:
    insar_velocities[coh_mask==0] = np.nan

insar_velocities[insar_velocities == 0] = np.nan

if apply_recommended_mask:
    # Applyt reliability mask from recommended mask
    insar_velocities[reliability_mask ==0] = np.nan

In [None]:
scp_args = f'{vel_file} velocity -v {vmin} {vmax} --colormap jet --noaxis --figsize 18 5.5 --figtitle LOS_Velocity --unit mm/yr --zm'
view.main(scp_args.split())

### Removing outliers associated with phase unwrapping errors and discontinuous displacement from the ARIA velocity

In [None]:
def remove_outliers_disp(displacement_array, method='modified_zscore', threshold=3.0):
    """
    Remove outliers from a 2D displacement rate
    """
    # Create a copy of the input array
    result = displacement_array.copy()
    
    # Create mask for valid pixels (non-NaN and non-zero)
    valid_mask = ~(np.isnan(result) | (result == 0))
    valid_data = result[valid_mask]
    
    if len(valid_data) == 0:
        return result, np.zeros_like(result, dtype=bool)
    
    # Initialize outlier mask
    outlier_mask = np.zeros_like(result, dtype=bool)

    if method == 'zscore':
        # Z-score method
        z_scores = np.abs(stats.zscore(valid_data))
        outliers = z_scores > threshold
        outlier_mask[valid_mask] = outliers
    elif method == 'modified_zscore':
        # Modified Z-score method
        median = np.median(valid_data)
        mad = stats.median_abs_deviation(valid_data)
        modified_z_scores = 0.6745 * np.abs(valid_data - median) / mad
        outliers = modified_z_scores > threshold
        outlier_mask[valid_mask] = outliers 
    else:
        raise ValueError(f"Unknown method: {method}")
    
    # Set outliers to NaN
    result[outlier_mask] = np.nan
    
    return result, outlier_mask

In [None]:
def plot_outlier_locations(displacement_array, outlier_mask, title='Outlier Locations'):
    
    plt.figure(figsize=(8, 6))
    
    # Create a mask for valid pixels
    valid_mask = ~(np.isnan(displacement_array) | (displacement_array == 0))
    all_mask = valid_mask + outlier_mask
    
    # Create a base array filled with masked values
    result = np.ones_like(displacement_array) * -1  # Background
    result[all_mask & ~outlier_mask] = 0         # Valid pixels
    result[all_mask & outlier_mask] = 1          # Outliers
    
    # Create custom colormap with black background
    colors = ['white', 'yellow', 'red']  # -1: black, 0: red, 1: yellow
    plt.imshow(result, cmap=matplotlib.colors.ListedColormap(colors))
    
    # Create legend for valid pixels only
    legend_elements = [
        mpatches.Patch(facecolor='yellow', label='Valid Pixels', edgecolor='yellow'),
        mpatches.Patch(facecolor='red', label='Outliers')
    ]
    plt.legend(handles=legend_elements, loc='best')
    
    plt.title(title)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
if apply_outlier_removal:
    # Apply outlier removal to ARIA velocity
    insar_velocities, outliers = remove_outliers_disp(insar_velocities, method=outlier_removal_method, threshold=outlier_zscore_threshold)     # return with cleaned ARIA velocity
    plot_outlier_locations(insar_velocities, outliers)

### Applying nonlinear-displacement mask

In [None]:
score_map_file = 'nonDispScore.h5'
score_map_file = os.path.join(mintpy_dir, score_map_file)

if apply_nonlinear_mask:

    variability_scores = readfile.read(score_map_file)[0]
    mask_var_score = variability_scores < thr_var_score     # selecting pixels with small temporal variability score
    insar_velocities[mask_var_score == 0] = np.nan    # added for nonlinear def masking

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 10))

    im1 = ax1.imshow(variability_scores, cmap='jet', vmin=0, vmax=1, interpolation='none')
    ax1.axis('off')
    ax1.set_title('Temporal variability score map')

    divider = make_axes_locatable(ax1)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im1, cax=cax)

    cmap_bw = matplotlib.colors.ListedColormap(['white', 'black'])
    im2 = ax2.imshow(mask_var_score, cmap_bw, interpolation='none')
    ax2.axis('off')
    ax2.set_title('Mask of temporal variability score map')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im2, cax=cax)
    cbar.set_ticks([0.25, 0.75])    # Set tick locations to the center of each color range
    cbar.set_ticklabels(['0', '1'])     # Set tick labels

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cax.axis('off')

<div class="alert alert-info">
<b>Note :</b> 
Negative values indicates that target is moving away from the radar (i.e., Subsidence in case of vertical deformation).
Positive values indicates that target is moving towards the radar (i.e., uplift in case of vertical deformation). 
</div>

<a id='secular_co_gps'></a>
## 4.2. Find Collocated GNSS Stations

The project will have access to L2 position data for continuous GNSS stations in third-party networks such NSF’s Plate Boundary Observatory, the HVO network for Hawaii, GEONET-Japan, and GEONET-New Zealand, located in target regions for ARIA calval. Station data will be post-processed by one or more analysis centers, will be freely available, and will have latencies of several days to weeks, as is the case with positions currently produced by the NSF’s GAGE Facility and separately by the University of Nevada Reno. Networks will contain one or more areas of high-density station coverage (2~20 km nominal station spacing over 100 x 100 km or more) to support validation of ARIA secular requirements at a wide range of length scales.

In [None]:
# get analysis metadata from ARIA velocity file
insar_metadata = readfile.read_attribute(vel_file)

start_date = insar_metadata.get('START_DATE', None)
end_date = insar_metadata.get('END_DATE', None)
start_date_gnss = dt.strptime(start_date, "%Y%m%d")
end_date_gnss = dt.strptime(end_date, "%Y%m%d")

geom_file = os.path.join(mintpy_dir, 'geometryGeo.h5')
inc_angle = readfile.read(geom_file, datasetName='incidenceAngle')[0]
inc_angle = np.nanmean(inc_angle)
az_angle = readfile.read(geom_file, datasetName='azimuthAngle')[0]
az_angle = np.nanmean(az_angle)

if os.path.exists(gnss_csv):
    gnss_df = pd.read_csv(gnss_csv)
    rejected_gnss_df = pd.read_csv(rejected_gnss_csv_file)
    # dummy-proof by discarding rejected stations tracked in the other csv file
    gnss_df = gnss_df[~gnss_df['site'].isin(rejected_gnss_df['site'])]
    gnss_df = gnss_df.reset_index(drop=True)
else:
    raise FileNotFoundError(f"{gnss_csv}- Not Found and should be created by run0_gnss_download_screen.py")

site_names = gnss_df['site']
site_lats_wgs84 = gnss_df['lat']
site_lons_wgs84 = gnss_df['lon']

site_names = [str(stn) for stn in site_names]
print("Initial list of {} stations used in analysis:".format(len(site_names)))
print(site_names)

<a id='secular_gps_ts'></a>
## 4.3. Get GNSS Position Time Series


In [None]:
# get daily position solutions for GNSS stations
use_stn = []  #stations to keep
bad_stn = []  #stations to toss
# track latlon coordinates
use_lats_keepwgs84 = [] 
use_lons_keepwgs84 = []
# get array dim
insar_shape = [int(insar_metadata['LENGTH']),
               int(insar_metadata['WIDTH'])]
insar_coord = ut.coordinate(insar_metadata)

for counter, stn in enumerate(site_names):
    gps_obj = GNSS(site = stn,
                   data_dir = os.path.join(mintpy_dir,f'GNSS-{gnss_source}'))
    gps_obj.open(print_msg=False)

    # get station lat/lon
    gps_lat, gps_lon = gps_obj.get_site_lat_lon()
    gps_y, gps_x = insar_coord.geo2radar(gps_lat, gps_lon)[:2]
    
    # only proceed if station is within valid bounds
    if gps_y >= insar_shape[0] or gps_x >= insar_shape[1]:
        print(f'Skipping {stn} since it is outside of valid TS bounds')
        bad_stn.append(stn)
        continue
    
    # for this quick screening check of data quality, we use the constant incidence and azimuth angles 
    # get standard deviation of residuals to linear fit
    ARIA_los = ut.enu2los(gps_obj.dis_e, gps_obj.dis_n, gps_obj.dis_u, inc_angle, az_angle)
    ARIA_detrended = signal.detrend(ARIA_los)
    stn_stdv = np.std(ARIA_detrended)

    # to remove NaN gnss velocity
    gnss_velocity = gnss.get_los_obs(insar_metadata, 
                            'velocity', 
                            [stn], 
                            start_date=start_date,
                            end_date=end_date,
                            source=gnss_source,
                            gnss_comp='enu2los', 
                            #model = step_model, 
                            redo=True, print_msg=False)
    
    # count number of dates in time range
    dates = gps_obj.dates
    range_days = (end_date_gnss - start_date_gnss).days
    gnss_count = np.histogram(dates, bins=[start_date_gnss, end_date_gnss])
    gnss_count = int(gnss_count[0])

    # select GNSS stations based on data completeness and scatter of residuals
    if range_days * gnss_completeness_threshold <= gnss_count:
        if (stn_stdv > gnss_residual_stdev_threshold) or np.isnan(gnss_velocity):
            bad_stn.append(stn)
        else:
            use_stn.append(stn)
            use_lats_keepwgs84.append(site_lats_wgs84[counter])
            use_lons_keepwgs84.append(site_lons_wgs84[counter])
    else:
        bad_stn.append(stn)

site_names = use_stn
site_lats_wgs84 = use_lats_keepwgs84
site_lons_wgs84 = use_lons_keepwgs84

print("\nFinal list of {} stations used in analysis:".format(len(site_names)))
print(site_names)
print("List of {} stations removed from analysis".format(len(bad_stn)))
print(bad_stn)

<a id='secular_gps_los'></a>
## 4.4. Project GNSS to LOS Velocities

In [None]:
gnss_velocities = gnss.get_los_obs(insar_metadata, 
                            'velocity', 
                            site_names, 
                            start_date=start_date,
                            end_date=end_date,
                            source=gnss_source,
                            gnss_comp='enu2los', 
                            #model = step_model, 
                            redo=True)

# scale site velocities from m/yr to mm/yr
gnss_velocities *= 1000.

print('\n site   vel_los [mm/yr]')
print(np.array([site_names, gnss_velocities]).T)

<a id='secular_gps_insar'></a>
## 4.5. Re-Reference GNSS and LOS Velocities


In [None]:
def create_polygon_globe_inset(lon_lat_coordinates, rect=[0.7, 0.7, 0.3, 0.3]):
    # Calculate the center of the polygon
    center_lon = np.mean([lon for lon, _ in lon_lat_coordinates])
    center_lat = np.mean([lat for _, lat in lon_lat_coordinates])

    # Create the inset axis with a globe projection
    inset_ax = plt.axes(rect, projection=ccrs.Orthographic(central_longitude=center_lon, central_latitude=center_lat))

    # Make the globe circular
    inset_ax.set_global()

    # Add colored land and water features
    inset_ax.add_feature(cfeature.LAND, facecolor='lightgray', edgecolor='none')
    inset_ax.add_feature(cfeature.OCEAN, facecolor='lightblue', edgecolor='none')
    inset_ax.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=0.5)
    inset_ax.add_feature(cfeature.BORDERS, linestyle=':', edgecolor='gray')

    # Plot the polygon
    inset_ax.plot([lon for lon, _ in lon_lat_coordinates],
                  [lat for _, lat in lon_lat_coordinates],
                  transform=ccrs.Geodetic(),
                  color='red',
                  linewidth=1)

    # Close the polygon
    inset_ax.plot([lon_lat_coordinates[0][0], lon_lat_coordinates[-1][0]],
                  [lon_lat_coordinates[0][1], lon_lat_coordinates[-1][1]],
                  transform=ccrs.Geodetic(),
                  color='red',
                  linewidth=1)

    # Add gridlines
    inset_ax.gridlines(color='gray', alpha=0.5, linestyle='--')

    # Remove the outline of the Earth
    inset_ax.spines['geo'].set_visible(False)

    return inset_ax

def rasterWrite(outtif, arr, transform, crs, dtype=None, nodata=np.nan):
    # writing geotiff using rasterio
    if dtype is None:
        dtype = arr.dtype
    with rasterio.open(outtif, 'w', driver='GTiff',
                       height=arr.shape[0], width=arr.shape[1],
                       count=1, dtype=dtype,
                       crs=crs,
                       transform=transform, nodata=nodata) as new_dataset:
        new_dataset.write(arr, 1)

def plot_insar_cartopy(insar_velocities, ARIA_region, vmin, vmax,
                         lats, lons, gnss_velocities, site,
                         sites_df, site_names, ref_lat, ref_lon,
                         insar_metadata, output_dir, sat_flag=True,
                         use_quiver=False, quiver_arrow_scale_factor=0.1):
    
    # Create CRS objects
    crs = CRS.from_epsg(4326)
    
    # Calculate the resolution of the input data
    res_x = float(insar_metadata['X_STEP'])
    res_y = float(insar_metadata['Y_STEP'])

    # Create the transform for the input data
    src_transform = Affine.translation(ARIA_region[2], ARIA_region[1]) * Affine.scale(res_x, res_y)

    # Write ARIA velocities to a temporary GeoTIFF file
    temp_tif = os.path.join(output_dir, 'temp_ARIA_velocities.tif')
    rasterWrite(temp_tif, insar_velocities, src_transform, crs)

    # Open the temporary file with rioxarray and reproject
    src = rioxarray.open_rasterio(temp_tif)
    ARIA_geo = src.rio.reproject("EPSG:4326")
    minlon, minlat, maxlon, maxlat = ARIA_geo.rio.bounds()

    # Create the figure and axis with Cartopy projection
    fig, ax = plt.subplots(figsize=(18, 18), subplot_kw={'projection': ccrs.PlateCarree()})

    if sat_flag:
        import cartopy.io.img_tiles as cimgt
        google_maps = cimgt.GoogleTiles(style='satellite', desired_tile_form="L")
        zoom_level = 8
        ax.add_image(google_maps, zoom_level, alpha=0.4, cmap='gray')

    ARIA_geo = ARIA_geo[0].to_numpy()
    # ARIA_geo[np.isnan(ARIA_geo)] = 0.

    # Plot reprojected insar velocities
    im = ax.imshow(ARIA_geo, extent=(minlon, maxlon, minlat, maxlat), 
                   transform=ccrs.PlateCarree(),
                   cmap='jet', vmin=vmin, vmax=vmax, alpha=1.0, interpolation='none')

    # Add colorbar with reduced height
    cbar = fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02, shrink=0.4)
    cbar.set_label('LOS velocity [mm/year]')


    # Plot GNSS stations
    if use_quiver:
        # Prepare data for quiver plot
        u = [0] * len(lons)
        v = deepcopy(gnss_velocities)
        
        # Calculate the appropriate scale for the arrows
        max_velocity = max(abs(min(gnss_velocities)), abs(max(gnss_velocities)))
        arrow_scale_factor = quiver_arrow_scale_factor  # Adjust this value to change arrow length (larger: longer arrow)
        
        # Plot quiver
        q = ax.quiver(lons, lats, u, v, color='black', 
                      scale=max_velocity/arrow_scale_factor, scale_units='inches', width=0.003, 
                      headwidth=4, headlength=5, headaxislength=3.5,
                      transform=ccrs.PlateCarree())
        
        # Add a key for scale
        qk = ax.quiverkey(q, 0.88, 0.05, 10, '10 mm/year', labelpos='E', 
                          coordinates='axes', color='black', labelcolor='black', 
                          fontproperties={'size': 'small'})
        
        # Ensure the quiverkey is on top of other elements
        qk.set_zorder(1000)
    else:
        cmap = plt.get_cmap('jet')
        for lat, lon, obs in zip(lats, lons, gnss_velocities):
            color = cmap((obs - vmin)/(vmax - vmin))
            ax.plot(lon, lat, marker='o', color=color, markersize=8, markeredgecolor='k', transform=ccrs.PlateCarree())

    # Plot reference site
    ax.plot(ref_lon, ref_lat, marker='s', color='black', markersize=8, transform=ccrs.PlateCarree())

    # Add site labels
    for i, label in enumerate(site_names):
        lon = lons[i]
        lat = lats[i]
        ax.text(lon, lat, label, fontsize=8, ha='left', va='bottom', transform=ccrs.PlateCarree())

    # Add map features
    ax.coastlines(resolution='10m', color='black')  # resolution='50m', '110m'
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS)

    # Set up gridlines
    gl = ax.gridlines(draw_labels=True, dms=False, x_inline=False, y_inline=False)
    gl.top_labels = False  # Remove top tick labels
    gl.right_labels = False  # Remove right tick labels
    gl.left_labels = True  # Add left tick labels
    gl.xlocator = mticker.FixedLocator(np.arange(-180,181,1.0))
    gl.ylocator = mticker.FixedLocator(np.arange(-90,91,1.0))

    # Set the extent of the map
    ax.set_extent([minlon, maxlon, minlat, maxlat], crs=ccrs.PlateCarree())

    coordinates = [
        (ARIA_region[2], ARIA_region[1]), (ARIA_region[3], ARIA_region[1]),
        (ARIA_region[3], ARIA_region[0]), (ARIA_region[2], ARIA_region[0])]
    
    # adding inset
    rect=[0.67, 0.64, 0.1, 0.1]    # dimension of inset [x_location of left, y_location of bottom, inset width, inset height]
    inset_ax = create_polygon_globe_inset(coordinates, rect=rect)

    # Add a title
    ax.set_title(
        f'Secular Displacement \n'
        f'FrameID: {site[1:]}, '
        f'Dates: {insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}',
        fontsize=16
    )

    # Save the figure
    plt.savefig(
        os.path.join(
            output_dir,
            f'Secular_vel_insar_vs_gnss_cartopy_site{site}_'
            f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.png'
        ),
        bbox_inches='tight',
        dpi=300,
        transparent=True
    )
    # plt.close()

    # Clean up temporary file
    os.remove(temp_tif)
    del ARIA_geo

In [None]:
# reference GNSS stations to GNSS reference site
ref_site = sites_df[sites_df["site"]==site]["gps_ref_site_name"].values[0]
ref_site_ind = site_names.index(ref_site)
gnss_velocities = gnss_velocities - gnss_velocities[ref_site_ind]

# reference insar to GNSS reference site
ref_site_lat = float(site_lats_wgs84[ref_site_ind])
ref_site_lon = float(site_lons_wgs84[ref_site_ind])

ref_y, ref_x = ut.coordinate(insar_metadata).geo2radar(ref_site_lat, ref_site_lon)[:2]     # x/y location of reference on velocity
if not math.isnan(insar_velocities[ref_y, ref_x]):
    #insar_velocities = insar_velocities - insar_velocities[ref_y, ref_x]
    #Caution: If you expand the radius parameter farther than the bounding grid it will break. 
    #To fix, remove the station in section 4 when the site_names list is filtered
    ref_vel_px_rad = insar_velocities[ref_y-pixel_radius:ref_y+1+pixel_radius, 
                        ref_x-pixel_radius:ref_x+1+pixel_radius]
    ref_insar_site_vel = np.nanmedian(ref_vel_px_rad)
    if np.isnan(ref_insar_site_vel):
        ref_insar_site_vel = 0.
    insar_velocities = insar_velocities - ref_insar_site_vel

satellite_background_flag = True    # if satellite image is used as background

plot_insar_cartopy(insar_velocities, ARIA_region, vmin, vmax, site_lats_wgs84, site_lons_wgs84, gnss_velocities, 
                   site, sites_df, site_names, ref_site_lat, ref_site_lon, 
                   insar_metadata, output_dir, satellite_background_flag, use_quiver=True, quiver_arrow_scale_factor=0.4)

<a id='secular_ARIA_validation'></a>
# 5. Validation Approach 1: GNSS-InSAR Direct Comparison 


<a id='secular_make_vel'></a>
## 5.1. Make Velocity Residuals at GNSS Locations


In [None]:
#Create dictionary with the stations as the key and all their info as an array 
stn_dict = {}

#Loop over GNSS station locations
for i in range(len(site_names)): 
    # convert GNSS station lat/lon information to velocity x/y grid
    stn_lat = float(site_lats_wgs84[i])
    stn_lon = float(site_lons_wgs84[i])

    y_value, x_value = ut.coordinate(insar_metadata).geo2radar(stn_lat, stn_lon)[:2]
    
    # get velocities and residuals
    gnss_site_vel = gnss_velocities[i]
    #Caution: If you expand the radius parameter farther than the bounding grid it will break. 
    #To fix, remove the station in section 4 when the site_names list is filtered
    vel_px_rad = insar_velocities[y_value-pixel_radius:y_value+1+pixel_radius, 
                     x_value-pixel_radius:x_value+1+pixel_radius]
    insar_site_vel = np.nanmedian(vel_px_rad)
    if not np.isnan(insar_site_vel):        # when only displacement exists
        residual = gnss_site_vel - insar_site_vel

        # populate data structure
        values = [x_value, y_value, insar_site_vel, gnss_site_vel, residual, stn_lat, stn_lon]
        stn = site_names[i]
        stn_dict[stn] = values

# extract data from structure
res_list = []
insar_site_vels = []
gnss_site_vels = []
north_list = []
east_list = []
site_names_used = []    

for stn in stn_dict.keys(): 
    insar_site_vels.append(stn_dict[stn][2])
    gnss_site_vels.append(stn_dict[stn][3])
    res_list.append(stn_dict[stn][4])
    north_list.append(stn_dict[stn][5])
    east_list.append(stn_dict[stn][6])
    site_names_used.append(stn)

num_stn = len(site_names_used) 
site_names_removed = list(set(site_names) - set(site_names_used))

print(f"The GNSS sites ({num_stn} stations) will be used for residual analysis: \n {site_names_used}")
print(f"The GNSS sites  ({len(site_names_removed)} stations) are removed due to the absence of ARIA velocity: \n {site_names_removed}")

print('Finish creating ARIA residuals at GNSS sites')

<a id='secular_make_velres'></a>
## 5.2. Make Double-Differenced Velocity Residuals


In [None]:
n_gps_sites = len(site_names_used)
diff_res_list = []
stn_dist_list = []

# loop over stations
for i in range(n_gps_sites-1):
    stn1 = site_names_used[i]
    for j in range(i + 1, n_gps_sites):
        stn2 = site_names_used[j]

        # calculate GNSS and ARIA velocity differences between stations
        gps_vel_diff = stn_dict[stn1][3] - stn_dict[stn2][3]
        insar_vel_diff = stn_dict[stn1][2] - stn_dict[stn2][2]

        # calculate GNSS vs ARIA differences (double differences) between stations
        diff_res = gps_vel_diff - insar_vel_diff
        diff_res_list.append(diff_res)

        # get euclidean distance (km) between stations
        # index 5 is northing, 6 is easting
        stn_dist = haversine_distance(stn_dict[stn1][6], stn_dict[stn1][5],
                                      stn_dict[stn2][6], stn_dict[stn2][5])
        stn_dist_list.append(stn_dist)

# Write data for statistical tests
gnss_site_dist = np.array(stn_dist_list)
double_diff_rel_measure = np.array(np.abs(diff_res_list))
ndx = np.argsort(gnss_site_dist)

<div class="alert alert-warning">
Final result Method 1—Successful when 68% of points below requirements line
</div>


<a id='secular_valid_method1'></a>
## 5.3. Secular Requirement Validation: Method 1


In [None]:
# Statistics
n_bins = 10
threshold = 0.683  
#  we assume that the distribution of residuals is Gaussian and 
#  that the threshold represents a 1-sigma limit within which 
#  we expect 68.3% of residuals to lie.

if findMax == 'true':
    thresh_flag = False
else:
    thresh_flag = True

tmp_secular_gnss_rqmt = deepcopy(secular_gnss_rqmt)
success_flag = deepcopy(thresh_flag)

In [None]:
while success_flag == thresh_flag:
    validation_table, fig = display_validation(gnss_site_dist,                 # binned distance for point
                                               double_diff_rel_measure,        # binned double-difference velocities mm/yr
                                               site,                           # cal/val site name
                                               start_date,                     # start date of insar dataset
                                               end_date,                       # end date of insar dataset 
                                               requirement=tmp_secular_gnss_rqmt,  # measurement requirement to meet, e.g 2 mm/yr for 3 years of data over 0.1-50km
                                               distance_rqmt=gnss_dist_rqmt,   # distance over requirement is to meet, e.g. over length scales of 0.1-50 km [0.1, 50] 
                                               n_bins=n_bins,                  # number of bins, to collect statistics 
                                               threshold=threshold,            # quantile threshold for point-pairs that pass requirement, e.g. 0.683 - we expect 68.3% of residuals to lie. 
                                               sensor='Sentinel-1',            # sensor that is validated, Sentinel-1 or NISAR
                                               validation_type='secular',      # validation for: secular, transient, coseismic requirement
                                               validation_data='GNSS')         # validation method: GNSS - Method 1, insar - Method 2

    success_flag = validation_table.loc['Total'][validation_table.columns[-1]]

    out_fig = (
        f'{output_dir}/VA1_secular_insar-gnss_velocity_vs_distance_site{site}_'
        f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}_'
        f'{tmp_secular_gnss_rqmt}.png'
    )
    fig.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

    if findMax == True:
        tmp_secular_gnss_rqmt += 0.01
    else:
        tmp_secular_gnss_rqmt -= 0.01

In [None]:
print(f'final gnss rqmt number: {tmp_secular_gnss_rqmt}')

In [None]:
display_validation_table(validation_table)

In [None]:
# generating csv and html files containing validation results
validation_table.to_csv(
    f'{output_dir}/VA1_secular_insar-gnss_velocity_vs_distance_table_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.csv'
)

html = display_validation_table(validation_table).background_gradient().to_html()
html_filename = (
    f'{output_dir}/VA1_secular_insar-gnss_velocity_vs_distance_table_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.html'
)
with open(html_filename, "w") as f:
    f.write(html)

In [None]:
options = {
    'format': 'png',
    'encoding': "UTF-8",
    'zoom': 2  # Increase this value for higher resolution
}

png_filename = (
    f'{output_dir}/VA1_secular_insar-gnss_velocity_vs_distance_table_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.png'
)
imgkit.from_file(html_filename, png_filename, options=options)


<div class="alert alert-warning">
Final result Method 1 table by distance bin—successful when greater than 0.683
</div>


<a id='secular_ARIA_validation2'></a>
# 6. Validation Approach 2: InSAR-only Structure Function

In Validation approach 2, we use a time interval and area where we assume no deformation.

<a id='secular_array_mask'></a>
## 6.1. Read Array and Mask Pixels with no Data

In [None]:
# use the assumed non-earthquake displacement as the insar_displacment for statistics and convert to mm
velStart = start_date
velEnd = end_date

# display map of data after masking
cmap = plt.get_cmap('jet')

fig, ax = plt.subplots(figsize=[15, 15])
im = ax.imshow(insar_velocities, cmap=cmap, vmin=vmin, vmax=vmax, interpolation='nearest')
cbar = fig.colorbar(im, ax=ax, orientation='vertical', pad=0.02, shrink=0.4)
cbar.set_label('LOS velocity [mm/year]')
ax.set_title(f"Secular \n {velStart} - {velEnd}")
ax.axis('off')
plt.show()

<a id='secular_remove_trend'></a>
## 6.2. Randomly Sample Pixels and Pair Them Up with Option to Remove Trend

In [None]:
sample_mode = 'points'  # 'points', 'profile'
# note that the 'profile' method may take significantly longer even with multi processing

X0,Y0 = load_geo(insar_metadata)
X0_2d,Y0_2d = np.meshgrid(X0,Y0)

# Collect samples using the specified method
if sample_mode in ['points']:

    insar_sample_dist, insar_rel_measure = samp_pair(X0_2d,
                                                         Y0_2d,
                                                         insar_velocities,
                                                         num_samples=1000000)
    
elif sample_mode in ['profile']:

    insar_sample_dist, insar_rel_measure = profile_samples(X0_2d.reshape(-1),
                                                                   Y0_2d.reshape(-1),
                                                                   insar_velocities.reshape(-1),
                                                                   len_rqmt=insar_dist_rqmt,
                                                                   num_samples=6000)
    
print('Finished sampling')

In [None]:
fig, ax = plt.subplots(figsize=[18, 5.5])
img1 = ax.hist(insar_sample_dist, bins=100)
ax.set_title("Histogram of distance \n Secular Date {:s} - {:s}".format(start_date, end_date))
ax.set_xlabel(r'Distance ($km$)')
ax.set_ylabel('Frequency')
ax.set_xlim(*insar_dist_rqmt)
    
fig, ax = plt.subplots(figsize=[18, 5.5])
img1 = ax.hist(insar_rel_measure, bins=100)
ax.set_title("Histogram of Relative Measurement \n Secular Date {:s} - {:s}".format(start_date, end_date))
ax.set_xlabel(r'Relative Measurement ($mm/year$)')
ax.set_ylabel('Frequency')

<a id='secular_M2ampvsdist2'></a>
## 6.3. Amplitude vs. Distance of Relative Measurements (pair differences)

In [None]:
# Statistics
n_bins = 10
threshold = 0.683  
#  we assume that the distribution of residuals is Gaussian and 
#  that the threshold represents a 1-sigma limit within which 
#  we expect 68.3% of residuals to lie.

if findMax == 'true':
    thresh_flag = False
else:
    thresh_flag = True

tmp_secular_insar_rqmt = deepcopy(secular_insar_rqmt)
success_flag = deepcopy(thresh_flag)

In [None]:
while success_flag == thresh_flag:
    validation_table, fig = display_validation(insar_sample_dist,              # binned distance for point
                                               insar_rel_measure,              # binned relative velocities mm/yr
                                               site,                           # cal/val site name
                                               start_date,                     # start date of InSAR dataset
                                               end_date,                       # end date of InSAR dataset 
                                               requirement=tmp_secular_insar_rqmt,  # measurement requirement to meet, e.g 2 mm/yr for 3 years of data over 0.1-50km
                                               distance_rqmt=insar_dist_rqmt,   # distance over requirement is to meet, e.g. over length scales of 0.1-50 km [0.1, 50] 
                                               n_bins=n_bins,                  # number of bins, to collect statistics 
                                               threshold=threshold,            # quantile threshold for point-pairs that pass requirement, e.g. 0.683 - we expect 68.3% of residuals to lie. 
                                               sensor='Sentinel-1',            # sensor that is validated, Sentinel-1 or NISAR
                                               validation_type='secular',      # validation for: secular, transient, coseismic requirement
                                               validation_data=validation_data)         # validation method: GNSS - Method 1, ARIA - Method 2

    success_flag = validation_table.loc['Total'][validation_table.columns[-1]]

    out_fig = (
        f'{output_dir}/VA2_secular_{validation_data}-only_vs_distance_site{site}_'
        f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}_'
        f'{tmp_secular_insar_rqmt}.png'
    )

    fig.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

    if findMax == True:
        tmp_secular_insar_rqmt += 0.01
    else:
        tmp_secular_insar_rqmt -= 0.01

In [None]:
print(f'final secular rqmt number: {tmp_secular_insar_rqmt}')

<div class="alert alert-warning">
Final result Method 2—
    68% of points below the requirements line is success
</div>


<a id='secular_M2RelMeasTable'></a>
## 6.4. Bin Sample Pairs by Distance Bin and Calculate Statistics

In [None]:
display_validation_table(validation_table)

In [None]:
# generating csv and html files containing validation results
validation_table.to_csv(
    f'{output_dir}/VA2_secular_{validation_data}-only_vs_distance_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.csv'
)

html = display_validation_table(validation_table).background_gradient().to_html()
html_filename = (
    f'{output_dir}/VA2_secular_{validation_data}-only_vs_distance_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.html'
)
with open(html_filename, "w") as f:
    f.write(html)

In [None]:
options = {
    'format': 'png',
    'encoding': "UTF-8",
    'zoom': 2  # Increase this value for higher resolution
}

png_filename = out_fig = (
    f'{output_dir}/VA2_secular_{validation_data}-only_vs_distance_table_site{site}_'
    f'date{insar_metadata["START_DATE"]}-{insar_metadata["END_DATE"]}.png'
)
imgkit.from_file(html_filename, png_filename, options=options)

<div class="alert alert-warning">
Final result Method 2 table of distance bins—
    68% (0.683) of points below the requirements line is success
</div>


<a id='secular_appendix1'></a>
# Appendix: Supplementary Comparisons and Plots

<a id='secular_compare_raw'></a>
## A.1. Compare Raw Velocities

In [None]:
vmin, vmax = -25, 25
plt.figure(figsize=(11,7))
plt.hist(insar_site_vels, range=[vmin, vmax], bins=50, color="green", edgecolor='grey', label=f'V_{validation_data}')
plt.hist(gnss_site_vels, range=[vmin, vmax], bins=50, color="orange", edgecolor='grey', label='V_gnss', alpha=0.5)
plt.legend(loc='upper right')
plt.title(f"Velocities \n Date range {start_date}-{end_date} \n Reference stn: {ref_site} \n Number of stations used: {num_stn}")
plt.xlabel('LOS Velocity (mm/year)')
plt.ylabel('N Stations')
plt.ylim(0,20)
plt.show()

<a id='secular_plot_vel'></a>
## A.2. Plot Velocity Residuals


In [None]:
vmin, vmax = -10, 10
plt.figure(figsize=(11,7))
plt.hist(res_list, bins = 40, range=[vmin,vmax], edgecolor='grey', color="darkblue", linewidth=1, label=f'V_gnss - V_{validation_data} (area average)')
plt.legend(loc='upper right')
plt.title(f"Residuals \n Date range {start_date}-{end_date} \n Reference stn: {ref_site} \n Number of stations used: {num_stn}")
plt.xlabel('Velocity Residual (mm/year)')
plt.ylabel('N Stations')
plt.show()

<a id='secular_plot_velres'></a>
## A.3. Plot Double Difference Residuals

In [None]:
plt.figure(figsize=(11,7))
plt.hist(diff_res_list, range = [vmin, vmax],bins = 40, color = "darkblue",edgecolor='grey',label=f'V_gnss_(s1-s2) - V_{validation_data}_(s1-s2)')
plt.legend(loc='upper right')
plt.title(f"Difference Residuals \n Date range {start_date}-{end_date} \n Reference stn: {ref_site} \n Number of stations used: {num_stn}")
plt.xlabel('Double Differenced Velocity Residual (mm/year)')
plt.ylabel('N Stations')
plt.show()

<a id='secular_appendix_gps'></a>
## A.4. GNSS Timeseries Plots


In [None]:
gnss_ts_plots_flag = True  # if gnss timeseries will be plotted. Reading timeseries may require large memory

unit = 'cm'
scale = 100

output_dir = f'{output_dir}/{gnss_dir}_plots'     # absolute path of output directory
os.makedirs(output_dir, exist_ok=True)

# set subplot positions
subplots_positions = {'ts': [0.03, 0.22, 0.73, 0.76],
                      'sp': [0.83, 0.25, 0.16, 0.73],
                      'hist': [0.83, 0.01, 0.16, 0.15],
                      'ts_dif': [0.03, 0.02, 0.73, 0.17]}
cm = 1/2.54 # width X height

# Load TS errors
error_file = f'{mintpy_dir}/rms_timeseriesResidual_ramp.txt'
error_fc = np.loadtxt(error_file, dtype=bytes).astype(str)
error_ts = error_fc[:, 1].astype(float)

if gnss_ts_plots_flag:
    # grab the time-series file used for time function estimation given the template setup
    template = readfile.read_template(os.path.join(mintpy_dir, 'smallbaselineApp.cfg'))
    template = ut.check_template_auto_value(template)

    # read the time-series file
    #Caution: If you expand the radius parameter farther than the bounding grid it will break. 
    #To fix, remove the station in section 4 when the site_names list is filtered
    read_box = (ref_x-pixel_radius, ref_y-pixel_radius,
                ref_x+1+pixel_radius, ref_y+1+pixel_radius)
    OG_ref_insar_dis, atr = readfile.read(insar_ts_file, datasetName='timeseries',
        box=read_box)
    print(f'reading timeseries from file: {insar_ts_file}')

    # Get date list
    date_list = timeseries(insar_ts_file).get_date_list()
    num_date = len(date_list)
    date0, date1 = date_list[0], date_list[-1]
    insar_dates = ptime.date_list2vector(date_list)[0]

    # spatial reference
    coord = ut.coordinate(atr)
    ref_gnss_obj = GNSS(site=ref_site,
                        data_dir=os.path.join(mintpy_dir, f'GNSS-{gnss_source}'))
    ref_lat, ref_lon = ref_gnss_obj.get_site_lat_lon()
    ref_y, ref_x = coord.geo2radar(ref_lat, ref_lon)[:2]

    ref_insar_dis = np.zeros(len(OG_ref_insar_dis))
    for i in range(len(OG_ref_insar_dis)):
        ts_med_slice = np.nanmedian(OG_ref_insar_dis[i])
        if np.isnan(ts_med_slice):
            ts_med_slice = 0.
        ref_insar_dis[i] = ts_med_slice

    # Plot displacements and velocity timeseries at GNSS station locations
    num_site = len(site_names_used)
    prog_bar = ptime.progressBar(maxValue=num_site)
    for i, site_name in enumerate(site_names_used):
        prog_bar.update(i+1, suffix=f'{site_names_used} {i+1}/{num_site}')

        ## read data
        # read GNSS
        gnss_obj = GNSS(site=site_name,
                        data_dir=os.path.join(mintpy_dir, f'GNSS-{gnss_source}'))
        gnss_dates, gnss_dis, gnss_std, gnss_lalo = gnss_obj.get_los_displacement(
            atr, start_date=date0, end_date=date1, ref_site=ref_site)[:4]
        # shift GNSS to zero-mean in time [for plotting purpose]
        gnss_dis -= np.nanmedian(gnss_dis)
        # scale GNSS stdev
        gnss_std = np.array([abs(j-gnss_std[0]) for j in gnss_std])

        # read ARIA
        y, x = coord.geo2radar(gnss_lalo[0], gnss_lalo[1])[:2]
        insar_ts, _ = readfile.read(insar_ts_file, datasetName='timeseries',
            box=(x,y,x+1,y+1))
        insar_ts = insar_ts[:]
        insar_dis = insar_ts - ref_insar_dis
        # apply a constant shift in time to fit ARIA to GNSS
        comm_dates = sorted(list(set(gnss_dates) & set(insar_dates)))
        if comm_dates:
            insar_flag = [x in comm_dates for x in insar_dates]
            gnss_flag = [x in comm_dates for x in gnss_dates]
            insar_dis -= np.nanmedian(insar_dis[insar_flag] - gnss_dis[gnss_flag])

        ## plot figure
        if gnss_dis.size > 0 and np.any(~np.isnan(insar_dis)):
            fig = plt.figure(figsize=(18*cm, 6*cm), layout="none", dpi=300)
            fig, ax = _plot_subplots(fig, subplots_positions)
            ax['ts'].errorbar(gnss_dates, gnss_dis*scale, yerr=gnss_std*scale, fmt='o',
                ms=1, c='r', elinewidth=0.1, label="GNSS Daily Positions")
            ax['ts'].errorbar(insar_dates, insar_dis*scale, yerr=error_ts*scale, fmt='o',
                ms=2, c='b', elinewidth=0.2, label="ARIA Positions")

            ax['ts'].set_ylabel(f'LOS displacement [cm]', fontsize=fontsize)
            ax['ts'].legend(fontsize=fontsize)

            ax['ts'].set_title(f'Station Name: {site_name} (ref station: {ref_site})',
                fontsize=fontsize)
            ax['ts'].tick_params(labelsize=fontsize, labelbottom=False)
            ax['ts'].axhline(0, color='gray', lw=0.3, linestyle='--')
            ax['ts'].axvline(ptime.date_list2vector([date0])[0], color='gray', linestyle='--', lw=0.3)

            # Plot scatter plot
            gnss_dt_and_data = [gnss_dates, gnss_dis]
            insar_dt_and_data = [insar_dates, insar_dis]
            df = plot_scatterplot(ax['sp'], gnss_dt_and_data, insar_dt_and_data,
                                'GNSS', 'ARIA',
                                ms=1, fontsize=fontsize-2,
                                scale=scale, unit=unit)

            # histogram of differences
            ax['hist'] = plot_histogram(ax['hist'], df.disp1-df.disp2, scale=scale, unit=unit, fontsize=fontsize-2)

            # Differences
            ax['ts_dif'].bar(df.date1, (df.disp1 - df.disp2)*scale, color='red', width=10, label=f'GNSS-ARIA')
            ax['ts_dif'].set_ylabel(f'Diff. [cm]', fontsize=fontsize)
            ax['ts_dif'].tick_params(labelsize=fontsize)
            ax['ts_dif'].set_xticks(ax['ts'].get_xticks())
            ax['ts_dif'].set_xlim(ax['ts'].get_xlim())
            ax['ts_dif'].axhline(0, color='r', lw=0.5, linestyle='--')
            ax['ts_dif'].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y/%m'))
            ax['ts_dif'].legend(fontsize=fontsize-2)

            # Save the figure
            plt.savefig(
                f'{output_dir}/{site_name}_site{site}.png',
                bbox_inches='tight',
                dpi=300,
                transparent=True
            )
    prog_bar.close()
    plt.show()