# Snowline performance assessment

Rainey Aberle

2022/2023

In [None]:
import ee
import geopandas as gpd
import glob
from joblib import dump, load
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import rioxarray as rxr
import rasterio as rio
from scipy import stats
from shapely import wkt
from ast import literal_eval
from shapely.geometry import Point, MultiLineString, LineString, shape, MultiPolygon, Polygon
from shapely.ops import split, unary_union, polygonize, nearest_points
import skimage.io
from skimage import feature
import sys
import wxee as wx
import xarray as xr
import rioxarray as rxr

In [None]:
# path to glacier-snow-cover-mapping/
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# path to study-sites
study_sites_path = '/Volumes/LaCie/raineyaberle/Research/PhD/write-ups/CH1_snow_cover_mapping_methods_manuscript/Aberle_et_al_dataset_submission'
# path to snowline-package/
snowlines_obs_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/manually-digitized-snowlines/' 
usgs_path = '/Volumes/LaCie/raineyaberle/Research/PhD/GIS_data/USGS'

# names of study sites
site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry']
# path for output figures
figures_out_path = os.path.join(base_path, 'figures')

# add path to functions
sys.path.insert(1, os.path.join(base_path, 'functions'))
import pipeline_utils as f

# load dataset dictionary
dataset_dict = json.load(open(os.path.join(base_path, 'inputs-outputs/datasets_characteristics.json')))

## PlanetScope

In [None]:
# -----Check if output file already exists
results_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_PlanetScope.csv')
if os.path.exists(results_fn):
    print('Performance stats already exist in file, loading...')
    results_df = pd.read_csv(results_fn)
else:

    # -----Load trained classifier and feature columns
    clf_fn = os.path.join(base_path, 'inputs-outputs', 'PlanetScope_classifier_all_sites.joblib')
    clf = load(clf_fn)
    feature_cols_fn = os.path.join(base_path, 'inputs-outputs', 'PlanetScope_feature_columns.json')
    feature_cols = json.load(open(feature_cols_fn))
    dataset = 'PlanetScope'

    # -----Loop through sites
    results_df = pd.DataFrame()
    for i, site_name in enumerate(site_names):    
    
        print(site_name)
        print('----------')
        
        # define path to raw images
        im_path = os.path.join(snowlines_obs_path, site_name, 'images')
    
        # load observed snow line shapefile names
        sl_obs_fns = glob.glob(os.path.join(snowlines_obs_path, site_name , 'snowlines', '*.shp'))
        sl_obs_fns = sorted(sl_obs_fns) # sort chronologically
        
        # aoi
        aoi_fn = glob.glob(os.path.join(usgs_path, 'glacierBoundaries', site_name, 'shapefile', '*.shp'))[0]
        aoi = gpd.read_file(aoi_fn)
        
        # dem
        dem_fn = glob.glob(os.path.join(usgs_path, 'dems', site_name, '*.tif'))[-1]
        dem = xr.open_dataset(dem_fn)
        dem = dem.squeeze().rename({'band_data': 'elevation'})
        
        # define output folders for classified images and snowline estimates
        im_classified_path = os.path.join(snowlines_obs_path, site_name, 'classified')
        snowlines_est_path = os.path.join(snowlines_obs_path, site_name, 'snowlines_est')
    
        # initialize observed snowline elevations
        sl_obs_elevs = np.zeros(len(sl_obs_fns)) 

        # loop through observed snow lines
        for sl_obs_fn in sl_obs_fns:
    
            # -----Load datasets
            ### Observed
            sl_obs = gpd.read_file(sl_obs_fn)
            # drop None geometry columns
            sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
            # reproject observed snow line to UTM
            sl_obs_UTM = sl_obs.to_crs(f'EPSG:{aoi.crs.to_epsg()}')
            # extract date from filename
            date = os.path.basename(sl_obs_fn).split(site_name+'_')[1][0:11]
            datetime = np.datetime64(f'{date[0:4]}-{date[4:6]}-{date[6:8]}T{date[9:11]}:00:00')
            print(f'\n{datetime}')
            
            ### Estimated      
            # open raw image of the same date
            im_fn = glob.glob(os.path.join(im_path, date.replace('-','')[0:8] + '*_adj.tif'))[0] # define file name
            im = rxr.open_rasterio(im_fn) # open image as xarray.DataArray
            # create xarray.Dataset
            im_adj = xr.Dataset(
                data_vars=dict(
                    Blue=(['y', 'x'], im.data[0]),
                    Green=(['y', 'x'], im.data[1]),
                    Red=(['y', 'x'], im.data[2]),
                    NIR=(['y', 'x'], im.data[3])
                ),
                coords=im.coords,
                attrs=dict(
                    no_data_values=np.nan,
                    image_scalar=1
                )
            )
            im_adj = xr.where(im_adj != 0, im_adj/1e4, np.nan)
            im_adj = im_adj.rio.write_crs('EPSG:' + str(im.rio.crs.to_epsg()))
            # add NDSI band
            im_adj['NDSI'] = ((im_adj[dataset_dict[dataset]['NDSI_bands'][0]] - im_adj[dataset_dict[dataset]['NDSI_bands'][1]])
                              / (im_adj[dataset_dict[dataset]['NDSI_bands'][0]] + im_adj[dataset_dict[dataset]['NDSI_bands'][1]]))
            # add time dimension
            im_adj = im_adj.expand_dims({'time': [datetime]})
            # classify image
            im_classified_fn = f'{site_name}_{str(date)}_PlanetScope_classified.nc'
            if os.path.exists(os.path.join(im_classified_path, im_classified_fn)):
                print('Classified image already exists in file, loading...')
                im_classified = xr.open_dataset(os.path.join(im_classified_path, im_classified_fn))
                # remove no data values
                im_classified = xr.where(im_classified==-9999, np.nan, im_classified)
                im_classified = im_classified.rio.write_crs(f"EPSG:{dem.rio.crs.to_epsg()}")
                # im_classified = im_classified.rio.write_crs('EPSG:4326')
                # im_classified = im_classified.rio.reproject(f'EPSG:{dem.rio.crs.to_epsg()}')
            else:  
                im_classified = f.classify_image(im_adj, clf, feature_cols, aoi, dataset_dict, dataset,
                                                 im_classified_fn, im_classified_path, verbose=False)
            # delineate snowline
            sl_est_fn = os.path.join(snowlines_est_path, site_name + '_' + date + '_PlanetScope_snowline.csv')
            if os.path.exists(sl_est_fn):
                print('Snowline already exists in file, loading...')
                sl_est = pd.read_csv(sl_est_fn)
                sl_est['datetime'] = pd.to_datetime([f'{date[0:4]}-{date[4:6]}-{date[6:8]}T{date[9:11]}:00:00' 
                                                     for date in sl_est['datetime']])
            else:
                sl_est = f.delineate_snowline(im_classified, site_name, aoi, dem, dataset_dict, dataset,
                                              str(datetime), sl_est_fn, im_classified_path, figures_out_path, 
                                              plot_results=False, im_xr=im_adj, verbose=False)
                   
            # check if snowlines were found
            if type(sl_est['snowlines_coords_X'].values[0])==str:
                print('No snowline coordinates detected, skipping...')
            else:
                # -----Sample elevations at observed snowline points
                xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
                ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
                sl_obs_elev = [dem.sel(x=x, y=y, method='nearest')['elevation'].data for x,y in list(zip(xsamp, ysamp))]
    
                # -----Split line depending on distance between points
                max_dist = 100 # m
                line = sl_obs_UTM.geometry[0]
                first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
                points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
                isplit = [0] # point indices where to split the line
                for i, p in enumerate(points):
                    if i!=0:
                        dist = p.distance(points[i-1])
                        if dist > max_dist:
                            isplit.append(i)
                isplit.append(len(points)) # add ending point to complete the last line
                line_split = [] # initialize split lines
                # loop through split indices
                if isplit:
                    for i, p in enumerate(isplit[:-1]):
                        if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                            line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
                else:
                    line_split = line
        
                # -----Regrid the observed snowlines to equal spacing
                dx = 30 # point spacing
                points_regrid = []
                for line in line_split:
                    distances = np.arange(0, line.length, dx)
                    line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                    # filter points outside the aoi
                    Iaoi = np.where(np.array([p.within(aoi.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                    points_aoi = [line_points[i] for i in Iaoi]
                    points_regrid = points_regrid + [p for p in points_aoi]
    
                # -----Calculate distance between each observed snowline point and the closest estimated snowline point
                sl_est['geometry'] = LineString(list(zip(sl_est['snowlines_coords_X'].values[0], sl_est['snowlines_coords_Y'].values[0])))
                distances = np.zeros(len(points_regrid))
                for i, p in enumerate(points_regrid):
                    # find nearest point
                    nearest_point = nearest_points(sl_est['geometry'][0], p)[0]
                    # calculate distance between points
                    distances[i] = p.distance(nearest_point)
                
                # -----Display results
                plt.figure(figsize=(8, 8))
                plt.imshow(np.dstack([im_adj['Red'].data[0], im_adj['Green'].data[0], im_adj['Blue'].data[0]]), 
                           extent=(np.min(im_adj.x.data), np.max(im_adj.x.data), np.min(im_adj.y.data), np.max(im_adj.y.data)))
                plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                         [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
                plt.plot(sl_est['snowlines_coords_X'][0], sl_est['snowlines_coords_Y'][0], '.m', label='estimated')
                plt.legend(loc='upper right')
                plt.grid()
                plt.title(datetime)
                plt.show()
    
                # compile results in df
                result_df = pd.DataFrame({'study_site': site_name, 
                                          'datetime': datetime, 
                                          'snowline_obs': [points_regrid], 
                                          'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                          'snowline_est': [sl_est['geometry'][0]], 
                                          'snowline_est_elev_median': sl_est['snowline_elevs_median_m'],
                                          'snowline_elev_median_differences': sl_est['snowline_elevs_median_m'] - np.nanmedian(sl_obs_elev),
                                          'snowline_distances': [distances],
                                          'snowline_distance_median': np.nanmedian(distances)})
    
                # concatenate to results_df
                results_df = pd.concat([results_df, result_df])
                
        print(' ')
            
    # -----Save to file
    results_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_PlanetScope.csv')
    results_df.to_csv(results_fn, index=False)
    print('Performance metrics saved to file: '+results_fn)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_differences'])
ax[1].set_title('Median snowline elevation differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['PlanetScope'],
                                 'ground distance P0 [m]': np.nanpercentile(results_df['snowline_distance_median'], 0),
                                 'ground distance P25 [m]': np.nanpercentile(results_df['snowline_distance_median'], 25),
                                 'ground distance P50 [m]': np.nanpercentile(results_df['snowline_distance_median'], 50),
                                 'ground distance P75 [m]': np.nanpercentile(results_df['snowline_distance_median'], 75),
                                 'ground distance P100 [m]': np.nanpercentile(results_df['snowline_distance_median'], 100),
                                 'elevation difference P0 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 0),
                                 'elevation difference P25 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 25),
                                 'elevation difference P50 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 50),
                                 'elevation difference P75 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 75),
                                 'elevation difference P100 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 100),
                                 'N': len(results_df)
                                })

# print results
print('PlanetScope snowline performance')
print('----------')
print("Ground distance: median = " + str(np.round(results_stats_df['ground distance P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['ground distance P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['ground distance P75 [m]'][0],2)) + " m")
print("Median elevation difference: median = " + str(np.round(results_stats_df['elevation difference P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['elevation difference P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['elevation difference P75 [m]'][0],2)) + " m")

# save to file
results_stats_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_stats_PlanetScope.csv')
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: ', results_stats_fn)

In [None]:
-70.53 + 0.93

## Landsat

In [None]:
# -----Check if output file already exists
results_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_Landsat.csv')
if os.path.exists(results_fn):
    print('Performance stats already exist in file, loading...')
    results_df = pd.read_csv(results_fn)
else:

    # -----Loop through sites
    results_df = pd.DataFrame()
    for i, site_name in enumerate(site_names):    
    
        print(site_name)
        print('----------')

        # aoi
        aoi_fn = glob.glob(os.path.join(usgs_path, 'glacierBoundaries', site_name, 'shapefile', '*.shp'))[0]
        aoi = gpd.read_file(aoi_fn)
        
        # load dem
        dem_fn = glob.glob(os.path.join(usgs_path, 'DEMs', site_name, '*.tif'))[-1]
        dem = xr.open_dataset(dem_fn)
        dem = dem.squeeze().rename({'band_data': 'elevation'})
        
        # load observed snowlines file names
        sl_obs_path = os.path.join(snowlines_obs_path, site_name, 'snowlines')
        sl_obs_fns = sorted(glob.glob(os.path.join(sl_obs_path, '*.shp')))
    
        # load estimated snowlines 
        sl_est_fn = os.path.join(study_sites_path, site_name, f'{site_name}_snow_cover_stats.csv')
        sl_est = pd.read_csv(sl_est_fn)
        # subset to dataset
        sl_est = sl_est.loc[sl_est['source']=='Landsat']
        sl_est['datetime'] = pd.to_datetime(sl_est['datetime'])
        sl_est['snowlines_coords_X'] = np.array(sl_est['snowlines_coords_X'].apply(literal_eval))
        sl_est['snowlines_coords_Y'] = np.array(sl_est['snowlines_coords_Y'].apply(literal_eval))
        sl_est.reset_index(drop=True, inplace=True)
    
        # Iterate over observed snowlines file names
        for sl_obs_fn in sl_obs_fns:
            # grab date from file name
            date = os.path.basename(sl_obs_fn).split(site_name + '_')[1].split('_adj')[0] 
            date = np.datetime64(f'{date[0:4]}-{date[4:6]}-{date[6:8]}T{date[9:11]}:00:00') 
            print(f'\n{date}')

            # load observed snowline
            sl_obs = gpd.read_file(sl_obs_fn)
            sl_obs_UTM = sl_obs.to_crs(f'EPSG:{aoi.crs.to_epsg()}')
    
            # identify the closest estimated snowline in time
            sl_est['dt'] = np.abs(sl_est['datetime'] - date)
            sl_est_date = sl_est.loc[sl_est['dt']==sl_est['dt'].min()]
            sl_est_date['geometry'] = LineString(list(zip(sl_est_date['snowlines_coords_X'].values[0], 
                                                          sl_est_date['snowlines_coords_Y'].values[0])))
            if sl_est_date['dt'].values[0] > np.timedelta64(10, 'D'):
                print('No observations within one week, skipping...')
                continue
    
            if len(sl_est_date['snowlines_coords_X'].values[0]) < 1:
                print('No snowline detected for date, skipping...')
                continue
               
            # -----Sample elevations at observed snowline points
            xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
            ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
            sl_obs_elev = [dem.sel(x=x, y=y, method='nearest')['elevation'].data for x,y in list(zip(xsamp, ysamp))]
                    
            # -----Split line depending on distance between points
            max_dist = 100 # m
            line = sl_obs_UTM.geometry[0]
            first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
            points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
            isplit = [0] # point indices where to split the line
            for i, p in enumerate(points):
                if i!=0:
                    dist = p.distance(points[i-1])
                    if dist > max_dist:
                        isplit.append(i)
            isplit.append(len(points)) # add ending point to complete the last line
            line_split = [] # initialize split lines
            # loop through split indices
            if isplit:
                for i, p in enumerate(isplit[:-1]):
                    if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                        line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
            else:
                line_split = line
                        
            #-----Regrid the observed snowlines to equal spacing
            dx = 30 # point spacing
            points_regrid = []
            for line in line_split:
                distances = np.arange(0, line.length, dx)
                line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                # filter points outside the aoi
                Iaoi = np.where(np.array([p.within(aoi.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                points_aoi = [line_points[i] for i in Iaoi]
                points_regrid = points_regrid + [p for p in points_aoi]
                    
            # -----Calculate distance between each observed snowline point and the closest estimated snowline point
            distances = np.zeros(len(points_regrid))
            for i, p in enumerate(points_regrid):
                # find nearest point
                nearest_point = nearest_points(sl_est_date['geometry'].values[0], p)[0]
                # calculate distance between points
                distances[i] = p.distance(nearest_point)
        
            #-----Display results
            # plt.figure(figsize=(8, 8))
            # plt.plot([p.coords.xy[0][0] for p in points_regrid], 
            #          [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
            # plt.plot(*sl_est_date['geometry'].values[0].coords.xy, '.m', label='estimated')
            # plt.legend(loc='upper right')
            # plt.grid()
            # plt.title(date)
            # plt.show()
    
            # compile results in df
            result_df = pd.DataFrame({'study_site': [site_name], 
                                      'snowline_obs_date': [str(date)], 
                                      'snowline_est_date': [sl_est_date['datetime'].values[0]],
                                      'snowline_obs': [points_regrid], 
                                      'snowline_obs_elev_median': [np.nanmedian(sl_obs_elev)],
                                      'snowline_est': [sl_est_date['geometry'].values[0]], 
                                      'snowline_est_elev_median': [sl_est_date['snowline_elevs_median_m'].values[0]],
                                      'snowline_elev_median_differences': [sl_est_date['snowline_elevs_median_m'].values[0] - np.nanmedian(sl_obs_elev)],
                                      'snowline_distances': [distances],
                                      'snowline_distance_median': [np.nanmedian(distances)]})
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])

    # Save to file
    results_df.to_csv(results_fn, index=False)
    print('Performance metrics saved to file:', results_fn) 
            
results_df

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_differences'])
ax[1].set_title('Median snowline elevation differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Landsat'],
                                 'ground distance P0 [m]': np.nanpercentile(results_df['snowline_distance_median'], 0),
                                 'ground distance P25 [m]': np.nanpercentile(results_df['snowline_distance_median'], 25),
                                 'ground distance P50 [m]': np.nanpercentile(results_df['snowline_distance_median'], 50),
                                 'ground distance P75 [m]': np.nanpercentile(results_df['snowline_distance_median'], 75),
                                 'ground distance P100 [m]': np.nanpercentile(results_df['snowline_distance_median'], 100),
                                 'elevation difference P0 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 0),
                                 'elevation difference P25 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 25),
                                 'elevation difference P50 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 50),
                                 'elevation difference P75 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 75),
                                 'elevation difference P100 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 100),
                                 'N': len(results_df)
                                })

# print results
print('Landsat snowline performance')
print('----------')
print("Ground distance: median = " + str(np.round(results_stats_df['ground distance P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['ground distance P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['ground distance P75 [m]'][0],2)) + " m")
print("Median elevation difference: median = " + str(np.round(results_stats_df['elevation difference P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['elevation difference P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['elevation difference P75 [m]'][0],2)) + " m")

# save to file
results_stats_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_stats_Landsat.csv')
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: ', results_stats_fn)

## Sentinel-2 SR

In [None]:
# -----Check if output file already exists
results_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_Sentinel-2_SR.csv')
if os.path.exists(results_fn):
    print('Performance stats already exist in file, loading...')
    results_df = pd.read_csv(results_fn)
else:

    # -----Loop through sites
    results_df = pd.DataFrame()
    for i, site_name in enumerate(site_names):    
    
        print(site_name)
        print('----------')

        # aoi
        aoi_fn = glob.glob(os.path.join(usgs_path, 'glacierBoundaries', site_name, 'shapefile', '*.shp'))[0]
        aoi = gpd.read_file(aoi_fn)
        
        # load dem
        dem_fn = glob.glob(os.path.join(usgs_path, 'DEMs', site_name, '*.tif'))[-1]
        dem = xr.open_dataset(dem_fn)
        dem = dem.squeeze().rename({'band_data': 'elevation'})
        
        # load observed snowlines file names
        sl_obs_path = os.path.join(snowlines_obs_path, site_name, 'snowlines')
        sl_obs_fns = sorted(glob.glob(os.path.join(sl_obs_path, '*.shp')))
    
        # load estimated snowlines 
        sl_est_fn = os.path.join(study_sites_path, site_name, f'{site_name}_snow_cover_stats.csv')
        sl_est = pd.read_csv(sl_est_fn)
        # subset to dataset
        sl_est = sl_est.loc[sl_est['source']=='Sentinel-2_SR']
        sl_est['datetime'] = pd.to_datetime(sl_est['datetime'])
        sl_est['snowlines_coords_X'] = np.array(sl_est['snowlines_coords_X'].apply(literal_eval))
        sl_est['snowlines_coords_Y'] = np.array(sl_est['snowlines_coords_Y'].apply(literal_eval))
        sl_est.reset_index(drop=True, inplace=True)
    
        # Iterate over observed snowlines file names
        for sl_obs_fn in sl_obs_fns:
            # grab date from file name
            date = os.path.basename(sl_obs_fn).split(site_name + '_')[1].split('_adj')[0] 
            date = np.datetime64(f'{date[0:4]}-{date[4:6]}-{date[6:8]}T{date[9:11]}:00:00') 
            print(f'\n{date}')

            # load observed snowline
            sl_obs = gpd.read_file(sl_obs_fn)
            sl_obs_UTM = sl_obs.to_crs(f'EPSG:{aoi.crs.to_epsg()}')
    
            # identify the closest estimated snowline in time
            sl_est['dt'] = np.abs(sl_est['datetime'] - date)
            sl_est_date = sl_est.loc[sl_est['dt']==sl_est['dt'].min()]
            sl_est_date['geometry'] = LineString(list(zip(sl_est_date['snowlines_coords_X'].values[0], 
                                                          sl_est_date['snowlines_coords_Y'].values[0])))
            if sl_est_date['dt'].values[0] > np.timedelta64(7, 'D'):
                print('No observations within one week, skipping...')
                continue
    
            if len(sl_est_date['snowlines_coords_X'].values[0]) < 1:
                print('No snowline detected for date, skipping...')
                continue
               
            # -----Sample elevations at observed snowline points
            xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
            ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
            sl_obs_elev = [dem.sel(x=x, y=y, method='nearest')['elevation'].data for x,y in list(zip(xsamp, ysamp))]
                    
            # -----Split line depending on distance between points
            max_dist = 100 # m
            line = sl_obs_UTM.geometry[0]
            first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
            points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
            isplit = [0] # point indices where to split the line
            for i, p in enumerate(points):
                if i!=0:
                    dist = p.distance(points[i-1])
                    if dist > max_dist:
                        isplit.append(i)
            isplit.append(len(points)) # add ending point to complete the last line
            line_split = [] # initialize split lines
            # loop through split indices
            if isplit:
                for i, p in enumerate(isplit[:-1]):
                    if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                        line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
            else:
                line_split = line
                        
            #-----Regrid the observed snowlines to equal spacing
            dx = 30 # point spacing
            points_regrid = []
            for line in line_split:
                distances = np.arange(0, line.length, dx)
                line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                # filter points outside the aoi
                Iaoi = np.where(np.array([p.within(aoi.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                points_aoi = [line_points[i] for i in Iaoi]
                points_regrid = points_regrid + [p for p in points_aoi]
                    
            # -----Calculate distance between each observed snowline point and the closest estimated snowline point
            distances = np.zeros(len(points_regrid))
            for i, p in enumerate(points_regrid):
                # find nearest point
                nearest_point = nearest_points(sl_est_date['geometry'].values[0], p)[0]
                # calculate distance between points
                distances[i] = p.distance(nearest_point)
        
            #-----Display results
            plt.figure(figsize=(8, 8))
            plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                     [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
            plt.plot(*sl_est_date['geometry'].values[0].coords.xy, '.m', label='estimated')
            plt.legend(loc='upper right')
            plt.grid()
            plt.title(date)
            plt.show()
    
            # compile results in df
            result_df = pd.DataFrame({'study_site': [site_name], 
                                      'snowline_obs_date': [str(date)], 
                                      'snowline_est_date': [sl_est_date['datetime'].values[0]],
                                      'snowline_obs': [points_regrid], 
                                      'snowline_obs_elev_median': [np.nanmedian(sl_obs_elev)],
                                      'snowline_est': [sl_est_date['geometry'].values[0]], 
                                      'snowline_est_elev_median': [sl_est_date['snowline_elevs_median_m'].values[0]],
                                      'snowline_elev_median_differences': [sl_est_date['snowline_elevs_median_m'].values[0] - np.nanmedian(sl_obs_elev)],
                                      'snowline_distances': [distances],
                                      'snowline_distance_median': [np.nanmedian(distances)]})
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])

    # Save to file
    results_df.to_csv(results_fn, index=False)
    print('Performance metrics saved to file:', results_fn) 
            
results_df

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_differences'])
ax[1].set_title('Median snowline elevation differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Sentinel-2_SR'],
                                 'ground distance P0 [m]': np.nanpercentile(results_df['snowline_distance_median'], 0),
                                 'ground distance P25 [m]': np.nanpercentile(results_df['snowline_distance_median'], 25),
                                 'ground distance P50 [m]': np.nanpercentile(results_df['snowline_distance_median'], 50),
                                 'ground distance P75 [m]': np.nanpercentile(results_df['snowline_distance_median'], 75),
                                 'ground distance P100 [m]': np.nanpercentile(results_df['snowline_distance_median'], 100),
                                 'elevation difference P0 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 0),
                                 'elevation difference P25 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 25),
                                 'elevation difference P50 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 50),
                                 'elevation difference P75 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 75),
                                 'elevation difference P100 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 100),
                                 'N': len(results_df)
                                })

# print results
print('Sentinel-2 SR snowline performance')
print('----------')
print("Ground distance: median = " + str(np.round(results_stats_df['ground distance P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['ground distance P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['ground distance P75 [m]'][0],2)) + " m")
print("Median elevation difference: median = " + str(np.round(results_stats_df['elevation difference P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['elevation difference P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['elevation difference P75 [m]'][0],2)) + " m")

# save to file
results_stats_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_stats_Sentinel-2_SR.csv')
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: ', results_stats_fn)

In [None]:
88/10

## Sentinel-2 TOA

In [None]:
# -----Check if output file already exists
results_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_Sentinel-2_TOA.csv')
if os.path.exists(results_fn):
    print('Performance stats already exist in file, loading...')
    results_df = pd.read_csv(results_fn)
else:

    # -----Loop through sites
    results_df = pd.DataFrame()
    for i, site_name in enumerate(site_names):    
    
        print(site_name)
        print('----------')

        # aoi
        aoi_fn = glob.glob(os.path.join(usgs_path, 'glacierBoundaries', site_name, 'shapefile', '*.shp'))[0]
        aoi = gpd.read_file(aoi_fn)
        
        # load dem
        dem_fn = glob.glob(os.path.join(usgs_path, 'DEMs', site_name, '*.tif'))[-1]
        dem = xr.open_dataset(dem_fn)
        dem = dem.squeeze().rename({'band_data': 'elevation'})
        
        # load observed snowlines file names
        sl_obs_path = os.path.join(snowlines_obs_path, site_name, 'snowlines')
        sl_obs_fns = sorted(glob.glob(os.path.join(sl_obs_path, '*.shp')))
    
        # load estimated snowlines 
        sl_est_fn = os.path.join(study_sites_path, site_name, f'{site_name}_snow_cover_stats.csv')
        sl_est = pd.read_csv(sl_est_fn)
        # subset to dataset
        sl_est = sl_est.loc[sl_est['source']=='Sentinel-2_TOA']
        sl_est['datetime'] = pd.to_datetime(sl_est['datetime'])
        sl_est['snowlines_coords_X'] = np.array(sl_est['snowlines_coords_X'].apply(literal_eval))
        sl_est['snowlines_coords_Y'] = np.array(sl_est['snowlines_coords_Y'].apply(literal_eval))
        sl_est.reset_index(drop=True, inplace=True)
    
        # Iterate over observed snowlines file names
        for sl_obs_fn in sl_obs_fns:
            # grab date from file name
            date = os.path.basename(sl_obs_fn).split(site_name + '_')[1].split('_adj')[0] 
            date = np.datetime64(f'{date[0:4]}-{date[4:6]}-{date[6:8]}T{date[9:11]}:00:00') 
            print(f'\n{date}')

            # load observed snowline
            sl_obs = gpd.read_file(sl_obs_fn)
            sl_obs_UTM = sl_obs.to_crs(f'EPSG:{aoi.crs.to_epsg()}')
    
            # identify the closest estimated snowline in time
            sl_est['dt'] = np.abs(sl_est['datetime'] - date)
            sl_est_date = sl_est.loc[sl_est['dt']==sl_est['dt'].min()]
            sl_est_date['geometry'] = LineString(list(zip(sl_est_date['snowlines_coords_X'].values[0], 
                                                          sl_est_date['snowlines_coords_Y'].values[0])))
            if sl_est_date['dt'].values[0] > np.timedelta64(7, 'D'):
                print('No observations within one week, skipping...')
                continue
    
            if len(sl_est_date['snowlines_coords_X'].values[0]) < 1:
                print('No snowline detected for date, skipping...')
                continue
               
            # -----Sample elevations at observed snowline points
            xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
            ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
            sl_obs_elev = [dem.sel(x=x, y=y, method='nearest')['elevation'].data for x,y in list(zip(xsamp, ysamp))]
                    
            # -----Split line depending on distance between points
            max_dist = 100 # m
            line = sl_obs_UTM.geometry[0]
            first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
            points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
            isplit = [0] # point indices where to split the line
            for i, p in enumerate(points):
                if i!=0:
                    dist = p.distance(points[i-1])
                    if dist > max_dist:
                        isplit.append(i)
            isplit.append(len(points)) # add ending point to complete the last line
            line_split = [] # initialize split lines
            # loop through split indices
            if isplit:
                for i, p in enumerate(isplit[:-1]):
                    if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                        line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
            else:
                line_split = line
                        
            #-----Regrid the observed snowlines to equal spacing
            dx = 30 # point spacing
            points_regrid = []
            for line in line_split:
                distances = np.arange(0, line.length, dx)
                line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                # filter points outside the aoi
                Iaoi = np.where(np.array([p.within(aoi.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                points_aoi = [line_points[i] for i in Iaoi]
                points_regrid = points_regrid + [p for p in points_aoi]
                    
            # -----Calculate distance between each observed snowline point and the closest estimated snowline point
            distances = np.zeros(len(points_regrid))
            for i, p in enumerate(points_regrid):
                # find nearest point
                nearest_point = nearest_points(sl_est_date['geometry'].values[0], p)[0]
                # calculate distance between points
                distances[i] = p.distance(nearest_point)
        
            #-----Display results
            plt.figure(figsize=(8, 8))
            plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                     [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
            plt.plot(*sl_est_date['geometry'].values[0].coords.xy, '.m', label='estimated')
            plt.legend(loc='upper right')
            plt.grid()
            plt.title(date)
            plt.show()
    
            # compile results in df
            result_df = pd.DataFrame({'study_site': [site_name], 
                                      'snowline_obs_date': [str(date)], 
                                      'snowline_est_date': [sl_est_date['datetime'].values[0]],
                                      'snowline_obs': [points_regrid], 
                                      'snowline_obs_elev_median': [np.nanmedian(sl_obs_elev)],
                                      'snowline_est': [sl_est_date['geometry'].values[0]], 
                                      'snowline_est_elev_median': [sl_est_date['snowline_elevs_median_m'].values[0]],
                                      'snowline_elev_median_differences': [sl_est_date['snowline_elevs_median_m'].values[0] - np.nanmedian(sl_obs_elev)],
                                      'snowline_distances': [distances],
                                      'snowline_distance_median': [np.nanmedian(distances)]})
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])

    # Save to file
    results_df.to_csv(results_fn, index=False)
    print('Performance metrics saved to file:', results_fn) 
            
results_df

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_differences'])
ax[1].set_title('Median snowline elevation differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Sentinel-2_TOA'],
                                 'ground distance P0 [m]': np.nanpercentile(results_df['snowline_distance_median'], 0),
                                 'ground distance P25 [m]': np.nanpercentile(results_df['snowline_distance_median'], 25),
                                 'ground distance P50 [m]': np.nanpercentile(results_df['snowline_distance_median'], 50),
                                 'ground distance P75 [m]': np.nanpercentile(results_df['snowline_distance_median'], 75),
                                 'ground distance P100 [m]': np.nanpercentile(results_df['snowline_distance_median'], 100),
                                 'elevation difference P0 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 0),
                                 'elevation difference P25 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 25),
                                 'elevation difference P50 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 50),
                                 'elevation difference P75 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 75),
                                 'elevation difference P100 [m]': np.nanpercentile(results_df['snowline_elev_median_differences'], 100),
                                 'N': len(results_df)
                                })

# print results
print('Sentinel-2 TOA snowline performance')
print('----------')
print("Ground distance: median = " + str(np.round(results_stats_df['ground distance P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['ground distance P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['ground distance P75 [m]'][0],2)) + " m")
print("Median elevation difference: median = " + str(np.round(results_stats_df['elevation difference P50 [m]'][0],2)) + ", "
      + "IQR = " + str(np.round(results_stats_df['elevation difference P25 [m]'][0],2))
      + "–" + str(np.round(results_stats_df['elevation difference P75 [m]'][0],2)) + " m")

# save to file
results_stats_fn = os.path.join(base_path, 'inputs-outputs', 'snowline_performance_stats_Sentinel-2_TOA.csv')
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: ', results_stats_fn)

## Compile all stats tables into one CSV

In [None]:
# grab stats file names
fns = sorted(glob.glob(os.path.join(base_path, 'inputs-outputs', 'snowline_performance_stats_*.csv')))

# initialize dataframe for all files
results_full = pd.DataFrame()

# loop through files
for fn in fns:
    # open file
    results = pd.read_csv(fn)
    # concatenate to full dataframe
    results_full = pd.concat([results_full, results])
    
# add column for average metrics
results = pd.DataFrame({'dataset': 'All datasets AVERAGE',
                        'ground distance P0 [m]': np.nanmean(results_full['ground distance P0 [m]']),
                        'ground distance P25 [m]': np.nanmean(results_full['ground distance P25 [m]']),
                        'ground distance P50 [m]': np.nanmean(results_full['ground distance P50 [m]']),
                        'ground distance P75 [m]': np.nanmean(results_full['ground distance P75 [m]']),
                        'ground distance P100 [m]': np.nanmean(results_full['ground distance P100 [m]']),
                        'elevation difference P0 [m]': np.nanmean(results_full['elevation difference P0 [m]']),
                        'elevation difference P25 [m]': np.nanmean(results_full['elevation difference P25 [m]']),
                        'elevation difference P50 [m]': np.nanmean(results_full['elevation difference P50 [m]']),
                        'elevation difference P75 [m]': np.nanmean(results_full['elevation difference P75 [m]']),
                        'elevation difference P100 [m]': np.nanmean(results_full['elevation difference P100 [m]']),
                        'N': np.sum(results_full['N'])
                       }, index=[5])
results_full = pd.concat([results_full, results])
                        
    
# save full dataframe to file
results_full_fn = 'snowline_performance_stats.csv'
results_full.to_csv(os.path.join(base_path, 'inputs-outputs', results_full_fn), index=False)
print('stats for all datasets compiled and saved: ', os.path.join(base_path, 'inputs-outputs', results_full_fn))

# delete individual files
for fn in fns:
    os.remove(fn)
    print('file deleted: '+fn)

In [None]:
results_full