# Backscatter Distributions by Slope (Foreslope, Backslope, Flat)

**Alex Lewandowski; Alaska Satellite Facility, University of Alaska Fairbanks**

## Plots the RTC backscatter distributions of each slope category of every MGRS tile and polarization

**Notebook Requires**
- MGRS tiles of prepared OPERA RTC CalVal data created with Prep_OPERA_RTC_CalVal_data_stage1_part3.ipynb

In [None]:
import copy
from ipyfilechooser import FileChooser
import numpy.ma as ma
import numpy as np
from pathlib import Path
import rioxarray as rxr
from scipy.stats import norm

from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import matplotlib.lines as lines
from matplotlib.offsetbox import AnchoredText

In [None]:
print("Select the directory holding your MGRS tile sub-directories")
fc = FileChooser(Path.cwd())
display(fc)

In [None]:
data_dir = Path(fc.selected_path)
mgrs = list()
for p in Path(data_dir).iterdir():
    if p.is_dir():
        mgrs.append(p)
mgrs

## Plot Backscatter Distributions on Original Samples of Varying Size

In [None]:
for i, m in enumerate(mgrs):
    tile = m.stem
    pols = ['VH', 'VV']
    
    for p in pols:
        fore_pth = list(m.glob(f"*{p}_clip_*_foreslope.tif"))[0]
        back_pth = list(m.glob(f"*{p}_clip_*_backslope.tif"))[0]
        flat_pth = list(m.glob(f"*{p}_clip_*_flat.tif"))[0]
        
        fore = rxr.open_rasterio(str(fore_pth), masked=True).to_numpy().flatten()
        back = rxr.open_rasterio(str(back_pth), masked=True).to_numpy().flatten()
        flat = rxr.open_rasterio(str(flat_pth), masked=True).to_numpy().flatten()
    
        # identify threshold for high value outliers
        fore_max = np.nanquantile(fore, 0.995)
        back_max = np.nanquantile(back, 0.995)
        flat_max = np.nanquantile(flat, 0.995)
        backscatter_max = max([fore_max, back_max, flat_max])
        
        if np.isnan(backscatter_max):
            print(f"Tile {tile} contains no data")
            break
        
        # remove high value outliers from datasets
        fore_mask = ma.masked_where(fore>backscatter_max, fore)
        fore = fore_mask.filled(fill_value=np.nan)   
        fore_mean = np.nanmean(fore)
        fore_std = np.nanstd(fore)
        
        back_mask = ma.masked_where(back>backscatter_max, back)
        back = back_mask.filled(fill_value=np.nan)  
        back_mean = np.nanmean(back)
        back_std = np.nanstd(back)
        
        flat_mask = ma.masked_where(flat>backscatter_max, flat)
        flat = flat_mask.filled(fill_value=np.nan)
        flat_mean = np.nanmean(flat)
        flat_std = np.nanstd(flat)
           
        # create histograms
        f, ax = plt.subplots(figsize=(18, 8))
        n_bins = 200
        colors = ['blue', 'green', 'darkorange']
        n, bins, patches = ax.hist([fore,back,flat], n_bins, color=colors, range=[0,backscatter_max], histtype='step')
        
        # fill 1st standard deviation for each histogram and add line at mean
        std_colors = ['skyblue', 'lightgreen', 'orange']
        means = [fore_mean, back_mean, flat_mean]
        stds = [fore_std, back_std, flat_std]
        for j, hist in enumerate(patches):
            y_max = hist[0].get_path().get_extents().y1
            hist_path = hist[0].get_path().vertices
            std_hist = plt.Polygon(hist_path, color=std_colors[j], fill=True, alpha=0.2)
            ax.add_patch(std_hist)
            std_clip = plt.Rectangle([means[j]-stds[j],means[j]+stds[j]], stds[j]*2, y_max, 
                                      fill=True, visible=False)
            ax.add_patch(std_clip)
            std_hist.set_clip_path(std_clip)
            mean_line = lines.Line2D([means[j],means[j]], [0, y_max], color=std_colors[j], ls='--')
            ax.add_artist(mean_line)
            mean_line.set_clip_path(hist[0])
        
        annotation = AnchoredText(
            (f"PIXEL COUNTS:\n"
             f"foreslope:  {np.count_nonzero(~np.isnan(fore))}\n"
             f"backslope: {np.count_nonzero(~np.isnan(back))}\n"
             f"flat:           {np.count_nonzero(~np.isnan(flat))}\n\n"
             f"MEAN:\n"
             f"foreslope:  {fore_mean}\n"
             f"backslope: {back_mean}\n"
             f"flat:           {flat_mean}\n\n"
             f"STANDARD DEVIATION:\n"
             f"foreslope:  {fore_std}\n"
             f"backslope: {back_std}\n"
             f"flat:           {flat_std}"
            ),
            loc='upper left', prop=dict(size=12), frameon=True, bbox_to_anchor=(1.0,1.0), bbox_transform=ax.transAxes)
        annotation.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(annotation)  

        # add histogram legend
        hist_handles = [lines.Line2D([0,1], [0,0], lw=1, color=c) for c in colors]
        hist_legend = ax.legend(handles=hist_handles, labels=['foreslope','backslope','flat'], loc='upper right')
        ax.add_artist(hist_legend)

        # add standard deviation legend
        std_handles = [Rectangle((0,0),1,1,color=c,ec="k",alpha=0.2) for c in std_colors]
        std_legend = ax.legend(handles=std_handles, labels=['foreslope 1 std', 'backslope 1 std', 'flat 1 std'], loc='center right', bbox_to_anchor=(1,0.75))
        ax.add_artist(std_legend)
        
        # add mean legend
        mean_handles = [lines.Line2D([0,0], [0,1], color=c, ls='--') for c in std_colors]
        mean_legend = ax.legend(handles=mean_handles, labels=['foreslope mean', 'backslope mean', 'flat mean'], loc='center right', bbox_to_anchor=(1,0.55))
        ax.add_artist(mean_legend)
        
        ax.set(title=f"Distribution of {p} Foreslope, Backslope, and Flat Backscatter Values\nMGRS: {tile}",
               xlabel='Backscatter',
               ylabel='Frequency')
        plt.show()
        
    # if i == 0:
    #     break

## Plot Backscatter Distributions on Random Samples (no replacement) of the Equal Sizes

In [None]:
for i, m in enumerate(mgrs):
    tile = m.stem
    pols = ['VH', 'VV']
    
    for p in pols:
        fore_pth = list(m.glob(f"*{p}_clip_*_foreslope.tif"))[0]
        back_pth = list(m.glob(f"*{p}_clip_*_backslope.tif"))[0]
        flat_pth = list(m.glob(f"*{p}_clip_*_flat.tif"))[0]
        
        # open and flatten datasets
        fore = rxr.open_rasterio(str(fore_pth), masked=True).to_numpy().flatten()
        back = rxr.open_rasterio(str(back_pth), masked=True).to_numpy().flatten()
        flat = rxr.open_rasterio(str(flat_pth), masked=True).to_numpy().flatten()
    
        # identify thresholds for high value outliers
        fore_max = np.nanquantile(fore, 0.995)
        back_max = np.nanquantile(back, 0.995)
        flat_max = np.nanquantile(flat, 0.995)
        backscatter_max = max([fore_max, back_max, flat_max])
        
        # identify empty tiles
        if np.isnan(backscatter_max):
            print(f"Tile {tile} contains no data")
            break
        
        # remove nans and high value outliers from datasets
        fore_mask = ma.masked_where(fore>backscatter_max, fore)
        fore = fore_mask.filled(fill_value=np.nan)   
        fore = fore[~np.isnan(fore)]
        back_mask = ma.masked_where(back>backscatter_max, back)
        back = back_mask.filled(fill_value=np.nan)  
        back = back[~np.isnan(back)]
        flat_mask = ma.masked_where(flat>backscatter_max, flat)
        flat = flat_mask.filled(fill_value=np.nan)
        flat = flat[~np.isnan(flat)]
        
        # randomly sample largest possible n from datasets without replacement
        min_len = min([len(fore), len(back), len(flat)])
        fore = np.random.choice(fore, min_len, replace=False)
        back = np.random.choice(back, min_len, replace=False)
        flat = np.random.choice(flat, min_len, replace=False)
        
        # calculate means and standard deviations
        fore_mean = np.nanmean(fore)
        fore_std = np.nanstd(fore)
        back_mean = np.nanmean(back)
        back_std = np.nanstd(back)
        flat_mean = np.nanmean(flat)
        flat_std = np.nanstd(flat)
           
        # create histograms
        f, ax = plt.subplots(figsize=(18, 8))
        n_bins = 200
        colors = ['blue', 'green', 'darkorange']
        n, bins, patches = ax.hist([fore,back,flat], n_bins, color=colors, histtype='step')
        
        # shade-fill 1st standard deviation for each histogram and add mean lines
        std_colors = ['skyblue', 'lightgreen', 'orange']
        means = [fore_mean, back_mean, flat_mean]
        stds = [fore_std, back_std, flat_std]
        for j, hist in enumerate(patches):
            y_max = hist[0].get_path().get_extents().y1
            hist_path = hist[0].get_path().vertices
            std_hist = plt.Polygon(hist_path, color=std_colors[j], fill=True, alpha=0.2)
            ax.add_patch(std_hist)
            std_clip = plt.Rectangle([means[j]-stds[j],means[j]+stds[j]], stds[j]*2, y_max, 
                                      fill=True, visible=False)
            ax.add_patch(std_clip)
            std_hist.set_clip_path(std_clip)
            mean_line = lines.Line2D([means[j],means[j]], [0, y_max], color=colors[j], ls='--')
            ax.add_artist(mean_line)
            mean_line.set_clip_path(hist[0])
        
        # add annotation
        annotation = AnchoredText(
            (f"PIXEL COUNTS:\n"
             f"foreslope:  {np.count_nonzero(~np.isnan(fore))}\n"
             f"backslope: {np.count_nonzero(~np.isnan(back))}\n"
             f"flat:           {np.count_nonzero(~np.isnan(flat))}\n\n"
             f"MEAN:\n"
             f"foreslope:  {fore_mean}\n"
             f"backslope: {back_mean}\n"
             f"flat:           {flat_mean}\n\n"
             f"STANDARD DEVIATION:\n"
             f"foreslope:  {fore_std}\n"
             f"backslope: {back_std}\n"
             f"flat:           {flat_std}"
            ),
            loc='upper left', prop=dict(size=12), frameon=True, bbox_to_anchor=(1.0,1.0), bbox_transform=ax.transAxes)
        annotation.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(annotation)  

        # add histogram legend
        hist_handles = [lines.Line2D([0,1], [0,0], lw=1, color=c) for c in colors]
        hist_legend = ax.legend(handles=hist_handles, labels=['foreslope','backslope','flat'], loc='upper right')
        ax.add_artist(hist_legend)

        # add standard deviation legend
        std_handles = [Rectangle((0,0),1,1,color=c,ec="k",alpha=0.2) for c in std_colors]
        std_legend = ax.legend(handles=std_handles, labels=['foreslope 1 std', 'backslope 1 std', 'flat 1 std'], loc='center right', bbox_to_anchor=(1,0.75))
        ax.add_artist(std_legend)
        
        # add mean legend
        mean_handles = [lines.Line2D([0,0], [0,1], color=c, ls='--') for c in colors]
        mean_legend = ax.legend(handles=mean_handles, labels=['foreslope mean', 'backslope mean', 'flat mean'], loc='center right', bbox_to_anchor=(1,0.55))
        ax.add_artist(mean_legend)
        
        ax.set(title=f"Distribution of {p} Foreslope, Backslope, and Flat Backscatter Values\nMGRS: {tile}",
               xlabel='Backscatter',
               ylabel='Frequency')
        plt.show()
        
    # if i == 0:
    #     break

*Backscatter_Distributions_by_Slope - Version 0.1.0 - April 2022*