# Assess snowline altitude uncertainty

In [None]:
import os
import glob
import pandas as pd
import xarray as xr
import rioxarray as rxr
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import geopandas as gpd
import sys

## Define inputs and outputs

In [None]:
# Import utility functions
code_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis/'
sys.path.append(os.path.join(code_path, 'functions'))
import utils as f

# Define path to study sites
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping'

# Define path for outputs
out_path = os.path.join(scm_path, 'analysis')

# Get names of study sites
rgi_ids = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_path, 'study-sites', 'RGI*')))]
rgi_ids

## Calculate SLA bounds (uncertainty) for all sites and classified images

The original SLA was calculated by sampling the $1-AAR$ percentile of the DEM. For example, if the AAR is 0.8, the SLA is calculated as the 20th percentile of elevations over the glacier area. 

$P_{SLA} = 1-AAR$

$SLA = P_{SLA}(DEM)$

To estimate upper and lower bounds for SLA, identify "misclassified" pixels above and below the SLA, and use those to adjust the SLA percentile. 

For the upper bound, calculate the area of snow-free pixels above the SLA, convert that to a percentile relative to the total area, and add that to the original SLA percentile. Sample the $P_{upper}$ of the DEM.  

$P_{upper} = \frac{A_{snow free, above SLA}}{A_{glacier}} + P_{SLA}$

$SLA_{upper} = P_{upper}(DEM)$

For the lower bound, calculate the area of snow-covered pixels below the SLA, convert that to a percentile relative to the total area, and subtract that from the original SLA percentile. Sample the $P_{lower}$ of the DEM. 

$P_{lower} = -\frac{A_{snow covered, below SLA}}{A_{glacier}} + P_{SLA}$

$SLA_{lower} = P_{lower}(DEM)$


In [None]:
# Define output file name
sla_bounds_fn = os.path.join(out_path, 'SLA_uncertainty_analysis.csv')
if not os.path.exists(sla_bounds_fn):
    # Initialize results DataFrame
    sla_bounds_df = pd.DataFrame()

    # Iterate over sites
    i=0
    for rgi_id in rgi_ids:
        # Load snow cover stats
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f"{rgi_id}_snow_cover_stats.csv")
        # skip if snow cover stats or classified images do not exist
        if not os.path.exists(scs_fn):
            continue
        if not os.path.exists(os.path.join(scm_path, 'study-sites', rgi_id, 'classified')):
            continue
        scs = pd.read_csv(scs_fn)
        
        # Check if site already has uncertainties columns
        if 'SLA_from_AAR_lower_bound_m' not in list(scs.keys()):
            # Load DEM
            dem_fn = glob.glob(os.path.join(scm_path, 'study-sites', rgi_id, 'DEMs', '*.tif'))[0]
            dem = rxr.open_rasterio(dem_fn).isel(band=0)
            dem = xr.where((dem < -1e3) | (dem > 1e4), np.nan, dem)
            dem = dem.rio.write_crs("EPSG:4326")
        
            # Load AOI
            aoi_fn = os.path.join(scm_path, 'study-sites', rgi_id, 'AOIs', f"{rgi_id}_outline.shp")
            aoi = gpd.read_file(aoi_fn)
            aoi = aoi.to_crs("EPSG:4326")
            
            # Clip DEM to AOI
            dem = dem.rio.clip(aoi.geometry)
            dem_min = float(dem.min())
            dem_max = float(dem.max())
    
            # Iterate over snow cover observations
            sla_originals = np.zeros(len(scs))
            sla_lower_bounds = np.zeros(len(scs))
            sla_upper_bounds = np.zeros(len(scs))
            pbar = tqdm(total=len(scs))
            for j in range(len(scs)):
                # subset snow cover stats
                sc = scs.iloc[j]
                
                # If AAR==1, classified image was completely snow-covered, so set both bounds to minimum elevation
                if sc['AAR']==1:
                    sla_lower_bounds[j], sla_upper_bounds[j] = dem_min, dem_min
                
                # If AAR==0, classified image was completely snow-free, so set both bounds to maximum elevation
                elif sc['AAR']==0:
                    sla_lower_bounds[j], sla_upper_bounds[j] = dem_max, dem_max
                
                # Otherwise, calculate bounds using DEM
                else:
                    # Load classified image file
                    dt = sc['datetime']
                    source = sc['source']
                    classified_fn = glob.glob(os.path.join(scm_path, 'study-sites', rgi_id, 'classified', 
                                                        f"{dt[0:10].replace('-', '')}*_{rgi_id}_{source}_classified.nc"))
                    if len(classified_fn) < 1:
                        print(rgi_id, dt, source)
                    else:
                        classified_fn = classified_fn[0]
                    classified = rxr.open_rasterio(classified_fn).squeeze()
                    classified = xr.where(classified==-9999, np.nan, classified)
                    classified = classified.rio.write_crs("EPSG:4326")
                    
                    # Create binary snow image
                    snow_binary = xr.where((classified==1) | (classified==2), 1, 0)
                    snow_binary = xr.where(np.isnan(classified), np.nan, snow_binary) # re-insert no data values
                    
                    # Regrid DEM to classified image grid
                    dem_adj = dem.rio.reproject_match(classified)
                    dem_adj = xr.where(dem_adj > 1e4, np.nan, dem_adj)
                    dem_adj = dem_adj.rio.write_crs("EPSG:4326")
                    
                    # Determine spatial resolution based on source
                    if source=='Landsat':
                        dx = 30
                    elif 'Sentinel-2' in source:
                        dx = 10
                    
                    # Calculate lower and upper bounds of snowline altitude
                    sla_originals[j], sla_lower_bounds[j], sla_upper_bounds[j] = f.calculate_sla_bounds(sc, dem_adj, snow_binary, dx=dx, verbose=False)

                pbar.update(1)
            pbar.close()    
                
            # Add to snow cover stats file
            scs['SLA_from_AAR_lower_bound_m'] = sla_lower_bounds
            scs['SLA_from_AAR_upper_bound_m'] = sla_upper_bounds
            
            # Re-save to file
            scs.to_csv(scs_fn)

            
        # Save in DataFrame
        df = scs[['RGIId', 'datetime', 'source', 'SLA_from_AAR_m', 'SLA_from_AAR_lower_bound_m', 'SLA_from_AAR_upper_bound_m']]
        # Concatenate to results DataFrame
        sla_bounds_df = pd.concat([sla_bounds_df, df], axis=0)
        
        i+=1 
        print(f"{i} / {len(rgi_ids)}")

    # Save results to file
    sla_bounds_df.reset_index(drop=True, inplace=True)
    sla_bounds_df.to_csv(sla_bounds_fn, index=False)
    print('SLA bounds saved to file:', sla_bounds_fn)
    
else:
    sla_bounds_df = pd.read_csv(sla_bounds_fn)

# Add column for total range and describe stats
sla_bounds_df['SLA_bounds_range_m'] = np.abs(sla_bounds_df['SLA_from_AAR_upper_bound_m'] - sla_bounds_df['SLA_from_AAR_lower_bound_m'])
plt.boxplot(sla_bounds_df['SLA_bounds_range_m'], showfliers=False)
plt.show()

sla_bounds_df['SLA_bounds_range_m'].describe()    