# Estimate glacier areas in each image, update transient AARs and snowline altitudes

In [None]:
import os
import glob
import geopandas as gpd
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import rioxarray as rxr
import xarray as xr
from p_tqdm import p_map
from tqdm import tqdm
import multiprocessing

# Define paths in directoru for convenience
data_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites'
figures_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis/figures'
out_path = os.path.join(data_path, '..', 'analysis')


## Load compiled glacier boundaries

In [None]:
aois_fn = os.path.join(out_path, 'AOIs.gpkg')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All glacier boundaries loaded from file.')

## Define processing functions

In [None]:
# Define function to solve for optimal UTM zone
def convert_wgs_to_utm(lon: float, lat: float):
    """
    Return best UTM epsg-code based on WGS84 lat and lon coordinate pair

    Parameters
    ----------
    lon: float
        longitude coordinate
    lat: float
        latitude coordinate

    Returns
    ----------
    epsg_code: str
        optimal UTM zone, e.g. "EPSG:32606"
    """
    utm_band = str(int((np.floor((lon + 180) / 6) % 60) + 1))
    if len(utm_band) == 1:
        utm_band = '0' + utm_band
    if lat >= 0:
        epsg_code = 'EPSG:326' + utm_band
        return epsg_code
    epsg_code = 'EPSG:327' + utm_band
    return epsg_code


# Define function to check classified images for debris cover
def estimate_areas_sla(aoi, classified_folder, dem_fn, crs_utm=None, plot_ts=True):
    # get classified image file names
    classified_fns = sorted(glob.glob(os.path.join(classified_folder, '*.nc')))

    # initialize debris variable
    glacier_area_list = []
    snow_area_list = []
    debris_area_list = []
    real_area_list = []
    masked_area_list = []
    aar_list = []
    sla_list = []
    sla_lower_list = []
    sla_upper_list = []
    im_dts_list = []

    # identify optimal UTM zone for better area calculations
    if crs_utm is None:
        aoi = aoi.to_crs("EPSG:4326")
        aoi_centroid = aoi.geometry[0].centroid.coords.xy[0][0], aoi.geometry[0].centroid.coords.xy[1][0]
        crs_utm = convert_wgs_to_utm(aoi_centroid[0], aoi_centroid[1])
        aoi = aoi.to_crs(crs_utm)
        # print('Optimal UTM zone =', crs_utm)
    
    # Load DEM
    dem = rxr.open_rasterio(dem_fn).squeeze()
    dem_crs = dem.rio.crs
    if 'band' in dem.coords:
        if len(np.shape(dem.coords['band'])) > 0: # NASADEM sometimes has extra bands, only need the first one
            dem = dem.isel(band=0) 
        dem = dem.reset_coords('band', drop=True) # drop the "band" coordinate
    dem = dem.rio.clip(aoi.geometry) # clip DEM to AOI to speed up later computations
    dem = xr.where((dem < -1e3) | (dem > 1e4), np.nan, dem) # mask no-data values
    dem = dem.rio.write_crs(dem_crs) # re-assign CRS
    dem = dem.rio.reproject(crs_utm) # reproject to UTM
    dem_min = np.nanmin(dem.data)
    dem_max = np.nanmax(dem.data)

    # iterate over classified images
    for i, fn in enumerate(classified_fns):
        # get image datetime from file name
        im_dt = datetime.datetime.strptime(os.path.basename(fn).split('_')[0], '%Y%m%dT%H%M%S')

        # open classified image
        im_classified = rxr.open_rasterio(fn).squeeze()
        if f"EPSG:{im_classified.rio.crs.to_epsg()}" != crs_utm:
            im_classified = im_classified.rio.reproject(crs_utm)
        im_classified = xr.where(im_classified==-9999, np.nan, im_classified)
        im_res = im_classified.rio.resolution()[0]
        im_classified = im_classified.rio.write_crs(crs_utm)

        # Create masks of snow, ice, and debris-covered areas
        im_classified['glacier_mask'] = xr.where(im_classified.isin([1, 2, 3]), 1, 0)
        im_classified['snow_mask'] = xr.where(im_classified.isin([1,2]), 1, 0)
        im_classified['debris_mask'] = xr.where(im_classified.isin([5]), 1, 0)

        # Calculate areas of each mask
        glacier_area = (im_classified['glacier_mask'] == 1).sum().values * im_res**2
        snow_area = (im_classified['snow_mask'] == 1).sum().values * im_res**2
        debris_area = (im_classified['debris_mask'] == 1).sum().values * im_res**2
        real_area = (~im_classified.isnull()).sum().values * im_res**2
        masked_area = (im_classified.isnull()).sum().values * im_res**2
        if glacier_area == 0:
            continue

        # Calculate transient AAR
        aar = snow_area / glacier_area

        # Calculate snowliine altitude (SLA) from DEM and AAR
        dem_glacier = dem.rio.reproject_match(im_classified) # reproject DEM to image coordinates
        im_classified['glacier_elevations'] = dem_glacier
        im_classified['glacier_elevations'] = xr.where(im_classified['glacier_mask']==1, 
                                                       im_classified['glacier_elevations'], 
                                                       np.nan) # mask non-glacier elevations
        elev_glacier = np.ravel(im_classified['glacier_elevations'].data) # ravel glacier elevations
        sla_percentile = 1 - aar # calculate the percentile to sample
        if aar == 1:
            sla = dem_min
            sla_lower = np.nan
            sla_upper = np.nan
        elif aar == 0:
            sla = dem_max
            sla_lower = np.nan
            sla_upper = np.nan
        else:
            sla = np.nanquantile(elev_glacier, sla_percentile)
            # Calculate lower and upper bounds on the SLA
            # identify snow-free pixels above the SLA and snow-covered pixels below the SLA
            snow_free_above_sla = xr.where((dem_glacier > sla) & (im_classified['snow_mask'] == 0), 1, 0)
            snow_free_above_sla_area = len(np.argwhere(snow_free_above_sla.data.ravel()==1).ravel()) * im_res**2
            snow_covered_below_sla = xr.where((dem_glacier < sla) & (im_classified['snow_mask'] == 1), 1, 0)
            snow_covered_below_sla_area = len(np.argwhere(snow_covered_below_sla.data.ravel()==1).ravel()) * im_res**2
            # convert areas to percentiles
            delta_up = snow_free_above_sla_area / glacier_area
            delta_down = snow_covered_below_sla_area / glacier_area
            # adjust SLA percentiles
            upper_sla_percentile = sla_percentile + delta_up
            lower_sla_percentile = sla_percentile - delta_down
            # make sure percentiles are within [0,1]
            upper_sla_percentile, lower_sla_percentile = np.clip([upper_sla_percentile, lower_sla_percentile], 0, 1)
            sla_upper = np.nanpercentile(elev_glacier, upper_sla_percentile * 100)
            sla_lower = np.nanpercentile(elev_glacier, lower_sla_percentile * 100)

        # Add results to lists
        glacier_area_list += [glacier_area]
        snow_area_list += [snow_area]
        debris_area_list += [debris_area]
        real_area_list += [real_area]
        masked_area_list += [masked_area]
        aar_list += [aar]
        sla_list += [sla]        
        sla_lower_list += [sla_lower]
        sla_upper_list += [sla_upper]
        im_dts_list += [im_dt]

    # compile in dataframe
    df = pd.DataFrame({'datetime': im_dts_list, 
                       'snow_area_m2': snow_area_list,
                       'glacier_area_m2': glacier_area_list,
                       'debris_area_m2': debris_area_list, 
                       'total_area_m2': real_area_list,
                       'masked_area_m2': masked_area_list,
                       'transient_AAR': aar_list,
                       'SLA_m': sla_list,
                       'SLA_upper_bound_m': sla_upper_list,
                       'SLA_lower_bound_m': sla_lower_list})
    df['datetime'] = pd.to_datetime(df['datetime'])

    # plot time series
    if plot_ts:
        fig, ax = plt.subplots(1, 1, figsize=(8, 6))
        ax.plot(df['datetime'], df['total_area_m2'] / 1e6, 'o', color='k', label='Total (image px)')
        ax.plot(df['datetime'], df['debris_area_m2'] / 1e6, '^', color='#bf812d', label='Debris')
        ax.plot(df['datetime'], df['glacier_area_m2'] / 1e6, '.', color='#35978f', label='Glacier')
        ax.plot(df['datetime'], df['snow_area_m2'] / 1e6, '*b', label='Snow')
        ax.legend(loc='best')
        ax.set_ylabel('Area (km$^2$)')
        fig.suptitle(aoi['RGIId'].values[0] + '\nAOI area = ' + str(aoi['Area'].values[0]) + ' km$^2$')
        fig.tight_layout()
        plt.show()

        return df, fig

    return df

## Run and save new snow cover stats for all sites

In [None]:
# Define order of columns for output files
col_order = ['RGIId', 'datetime', 'source', 'snow_area_m2', 'glacier_area_m2', 'debris_area_m2', 'masked_area_m2',
             'transient_AAR', 'SLA_m', 'SLA_lower_bound_m', 'SLA_upper_bound_m', 
             'snowline_elevs_m', 'snowline_elevs_median_m', 'snowline_geometry']

# Iterate over sites
for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
    # define output file name
    sc_stats_new_fn = os.path.join(data_path, rgi_id, f"{rgi_id}_snow_cover_stats_adjusted.csv")
    if os.path.exists(sc_stats_new_fn):
        continue
    
    # Load snow cover stats
    sc_stats_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    sc_stats = pd.read_csv(sc_stats_fn)
    sc_stats['datetime'] = pd.to_datetime(sc_stats['datetime'])

    # Subset AOIs to site
    aoi = aois.loc[aois['RGIId'] == rgi_id]
    aoi.reset_index(drop=True, inplace=True)

    # Define DEM file name
    dem_fn = glob.glob(os.path.join(data_path, rgi_id, 'DEMs', f"{rgi_id}*clip*.tif"))[0]

    # Calculate areas of each variable
    classified_folder = os.path.join(data_path, aoi['RGIId'].values[0], 'classified')
    df = estimate_areas_sla(aoi, classified_folder, dem_fn, plot_ts=False)

    # Merge dataframes
    sc_stats = sc_stats.merge(df, on='datetime')
    sc_stats = sc_stats[col_order]

    # Save to file
    sc_stats.to_csv(sc_stats_new_fn, index=False)