# Backscatter Distributions by Slope (Foreslope, Backslope, Flat)

**Alex Lewandowski; Alaska Satellite Facility, University of Alaska Fairbanks**

## Plots the RTC backscatter distributions of each slope category of every MGRS tile and polarization

**Notebook Requires**
- MGRS tiles of prepared OPERA RTC CalVal data created with Prep_OPERA_RTC_CalVal_data_stage1_part3.ipynb

In [None]:
import copy
from ipyfilechooser import FileChooser
import numpy.ma as ma
import numpy as np
from pathlib import Path
import rioxarray as rxr
from scipy.stats import norm

from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import matplotlib.lines as lines
from matplotlib.offsetbox import AnchoredText

import opensarlab_lib as osl

In [None]:
print("Select the directory holding your MGRS tile sub-directories")
fc = FileChooser(Path.cwd())
display(fc)

In [None]:
data_dir = Path(fc.selected_path)
mgrs = list()
for p in Path(data_dir).iterdir():
    if p.is_dir():
        mgrs.append(p)
mgrs

## Plot Backscatter Distributions on Original Samples of Varying Size

In [None]:
print("Select the scale in which to work:")
scale_choice = osl.select_parameter(['power scale', 'log scale'])
display(scale_choice)

In [None]:
def plot_backscatter_distributions_by_slope(fore, back, flat, means, stds, polarization, tile, backscatter_minmax=None):
            # create histograms
            f, ax = plt.subplots(figsize=(18, 8))
            n_bins = 200
            colors = ['blue', 'green', 'darkorange']
            n, bins, patches = ax.hist([fore,back,flat], n_bins, color=colors,
                                       range=backscatter_minmax, histtype='step')

            # fill 1st standard deviation for each histogram and add line at mean
            std_colors = ['skyblue', 'lightgreen', 'orange']
            for j, hist in enumerate(patches):
                y_max = hist[0].get_path().get_extents().y1
                hist_path = hist[0].get_path().vertices
                std_hist = plt.Polygon(hist_path, color=std_colors[j], fill=True, alpha=0.2)
                ax.add_patch(std_hist)
                std_clip = plt.Rectangle([means[j]-stds[j],means[j]+stds[j]], stds[j]*2, y_max, 
                                          fill=True, visible=False)
                ax.add_patch(std_clip)
                std_hist.set_clip_path(std_clip)
                mean_line = lines.Line2D([means[j],means[j]], [0, y_max], color=colors[j], ls='--')
                ax.add_artist(mean_line)
                mean_line.set_clip_path(hist[0])

            annotation = AnchoredText(
                (f"PIXEL COUNTS:\n"
                 f"foreslope:  {np.count_nonzero(~np.isnan(fore))}\n"
                 f"backslope: {np.count_nonzero(~np.isnan(back))}\n"
                 f"flat:           {np.count_nonzero(~np.isnan(flat))}\n\n"
                 f"MEAN:\n"
                 f"foreslope:  {fore_mean}\n"
                 f"backslope: {back_mean}\n"
                 f"flat:           {flat_mean}\n\n"
                 f"STANDARD DEVIATION:\n"
                 f"foreslope:  {fore_std}\n"
                 f"backslope: {back_std}\n"
                 f"flat:           {flat_std}"
                ),
                loc='upper left', prop=dict(size=12), frameon=True, bbox_to_anchor=(1.0,1.0), bbox_transform=ax.transAxes)
            annotation.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
            ax.add_artist(annotation)  

            # add histogram legend
            hist_handles = [lines.Line2D([0,1], [0,0], lw=1, color=c) for c in colors]
            hist_legend = ax.legend(handles=hist_handles, labels=['foreslope','backslope','flat'], loc='upper right')
            ax.add_artist(hist_legend)

            # add standard deviation legend
            std_handles = [Rectangle((0,0),1,1,color=c,ec="k",alpha=0.2) for c in std_colors]
            std_legend = ax.legend(handles=std_handles, labels=['foreslope 1 std', 'backslope 1 std', 'flat 1 std'], loc='center right', bbox_to_anchor=(1,0.75))
            ax.add_artist(std_legend)

            # add mean legend
            mean_handles = [lines.Line2D([0,0], [0,1], color=c, ls='--') for c in colors]
            mean_legend = ax.legend(handles=mean_handles, labels=['foreslope mean', 'backslope mean', 'flat mean'], loc='center right', bbox_to_anchor=(1,0.55))
            ax.add_artist(mean_legend)

            ax.set(title=f"Distribution of {polarization} Foreslope, Backslope, and Flat Backscatter Values\nMGRS: {tile}",
                   xlabel='Backscatter',
                   ylabel='Frequency')
            plt.show()

In [None]:
# power = scale_choice.value == 'power scale'
log = scale_choice.value == 'log scale'

pols = ['VH', 'VV']

vh_total = [np.array([]), np.array([]), np.array([])]
vv_total = [np.array([]), np.array([]), np.array([])]
for i, m in enumerate(mgrs):
    tile = m.stem
    
    for p in pols:
        fore_pth = list(m.glob(f"*{p}_clip_*_foreslope.tif"))[0]
        back_pth = list(m.glob(f"*{p}_clip_*_backslope.tif"))[0]
        flat_pth = list(m.glob(f"*{p}_clip_*_flat.tif"))[0]
        
        fore = rxr.open_rasterio(str(fore_pth), masked=True).to_numpy().flatten()
        back = rxr.open_rasterio(str(back_pth), masked=True).to_numpy().flatten()
        flat = rxr.open_rasterio(str(flat_pth), masked=True).to_numpy().flatten()

        if log:
            fore = 10 * np.log10(fore)
            back = 10 * np.log10(back)
            flat = 10 * np.log10(flat)
            
        if p == 'VH':
            vh_total[0] = np.concatenate([vh_total[0], fore])
            vh_total[1] = np.concatenate([vh_total[1], back])
            vh_total[2] = np.concatenate([vh_total[2], flat])
        else:
            vv_total[0] = np.concatenate([vv_total[0], fore])
            vv_total[1] = np.concatenate([vv_total[1], back])
            vv_total[2] = np.concatenate([vv_total[2], flat])            

        # calculate means and standard deviations
        fore_mean = np.nanmean(fore)
        fore_std = np.nanstd(fore)
        back_mean = np.nanmean(back)
        back_std = np.nanstd(back)
        flat_mean = np.nanmean(flat)
        flat_std = np.nanstd(flat)
    
        # identify thresholds for outliers
        # change these values to limit the x range of the histograms
        # changing these values will NOT change the already calculated means and standard deviations
        high_outlier_thresh = 0.000
        low_outlier_thresh = high_outlier_thresh
        fore_max = np.nanquantile(fore, 1-high_outlier_thresh)
        back_max = np.nanquantile(back, 1-high_outlier_thresh)
        flat_max = np.nanquantile(flat, 1-high_outlier_thresh)
        fore_min = np.nanquantile(fore, low_outlier_thresh)
        back_min = np.nanquantile(back, low_outlier_thresh)
        flat_min = np.nanquantile(flat, low_outlier_thresh)
        backscatter_max = max([fore_max, back_max, flat_max])
        backscatter_min = min([fore_min, back_min, flat_min])

        # identify empty tiles
        if np.isnan(backscatter_max):
            print(f"Tile {tile} contains no data")
            break
        
        # # remove high value outliers from datasets
        # fore_mask = ma.masked_where(fore>backscatter_max, fore)
        # fore = fore_mask.filled(fill_value=np.nan)   
        # back_mask = ma.masked_where(back>backscatter_max, back)
        # back = back_mask.filled(fill_value=np.nan)  
        # flat_mask = ma.masked_where(flat>backscatter_max, flat)
        # flat = flat_mask.filled(fill_value=np.nan)
        
        means = [fore_mean, back_mean, flat_mean]
        stds = [fore_std, back_std, flat_std]
        
        plot_backscatter_distributions_by_slope(fore, back, flat, means, stds, p, tile, [backscatter_min, backscatter_max])

    # if i == 0:
    #     break
        
# calculate means and standard deviations for full scene
vh_means = [np.nanmean(vh_total[0]), np.nanmean(vh_total[1]), np.nanmean(vh_total[2])]
vv_means = [np.nanmean(vv_total[0]), np.nanmean(vv_total[1]), np.nanmean(vv_total[2])]
vh_stds = [np.nanstd(vh_total[0]), np.nanstd(vh_total[1]), np.nanstd(vh_total[2])]
vv_stds = [np.nanstd(vv_total[0]), np.nanstd(vv_total[1]), np.nanstd(vv_total[2])]

# identify thresholds for outliers
# change these values to limit the x range of the histograms
# changing these values will NOT change the already calculated means and standard deviations
high_outlier_thresh = 0.000
low_outlier_thresh = high_outlier_thresh
vh_max = max([np.nanquantile(b, 1-high_outlier_thresh) for b in vh_total])
vh_min = min([np.nanquantile(b, low_outlier_thresh) for b in vh_total])
vv_max = max([np.nanquantile(b, 1-high_outlier_thresh) for b in vv_total])
vv_min = min([np.nanquantile(b, low_outlier_thresh) for b in vv_total])

plot_backscatter_distributions_by_slope(vh_total[0], vh_total[1], vh_total[2], vh_means, vh_stds, 'FULL SCENE VH', [m.stem for m in mgrs], [vh_min,vh_max])
plot_backscatter_distributions_by_slope(vv_total[0], vv_total[1], vv_total[2], vv_means, vv_stds, 'FULL SCENE VV', [m.stem for m in mgrs], [vv_min,vv_max])

*Backscatter_Distributions_by_Slope - Version 0.1.0 - April 2022*