# Snowline performance assessment

Rainey Aberle

2022/2023

In [None]:
import ee
import geopandas as gpd
import glob
from joblib import dump, load
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import rioxarray as rxr
import rasterio as rio
from scipy import stats
from shapely import wkt
from shapely.geometry import Point, MultiLineString, LineString, shape, MultiPolygon, Polygon
from shapely.ops import split, unary_union, polygonize, nearest_points
import skimage.io
from skimage import feature
import sys
import wxee as wx
import xarray as xr

In [None]:
# path to snow-cover-mapping/
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# path to study-sites/
study_sites_path = '/Users/raineyaberle/Google Drive/My Drive/Research/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/'
# path to snowline-package/
snowlines_obs_path = '/Users/raineyaberle/Google Drive/My Drive/Research/PhD/snow_cover_mapping/snowline-package/' 

# names of study sites
site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry']
# path for output figures
figures_out_path = base_path+'figures/'

# add path to functions
sys.path.insert(1, base_path+'functions/')
import pipeline_utils as f

# load dataset dictionary
dataset_dict = json.load(open(base_path + 'inputs-outputs/datasets_characteristics.json'))

## PlanetScope

In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/PlanetScope_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/PlanetScope_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))
dataset = 'PlanetScope'

# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # define path to raw images
    im_path = snowlines_obs_path + site_name + '/images/'

    # load observed snow line shapefile names
    sl_obs_fns = glob.glob(snowlines_obs_path + site_name + '/snowlines/*.shp')
    sl_obs_fns = sorted(sl_obs_fns) # sort chronologically
    
    # AOI
    AOI_fn = glob.glob(study_sites_path + site_name + '/AOIs/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # DEM
    DEM_fn = glob.glob(study_sites_path + site_name + '/DEMs/' + site_name + '*_clip.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})
    DEM = DEM.rio.reproject('EPSG:'+str(AOI.crs.to_epsg()))
    # remove unnecessary data (possible extra bands from ArcticDEM or other DEM)
    if len(np.shape(DEM.elevation.data))>2:
        DEM['elevation'] = DEM.elevation[0]
    
    # define output folders for classified images and snowline estimates
    im_classified_path = snowlines_obs_path + site_name + '/classified/'
    snowlines_est_path = snowlines_obs_path + site_name + '/snowlines_est/'

    # initialize observed snowline elevations
    sl_obs_elevs = np.zeros(len(sl_obs_fns)) 

    # loop through observed snow lines
    for sl_obs_fn in sl_obs_fns:

        # -----Load datasets
        ### Observed
        sl_obs = gpd.read_file(sl_obs_fn)
        # drop None geometry columns
        sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
        # reproject observed snow line to UTM
        sl_obs_UTM = sl_obs.to_crs(str(AOI.crs.to_epsg()))
        # extract date from filename
        date = sl_obs_fn.split('/'+site_name+'_')[1][0:11]
        datetime = np.datetime64(date[0:4]+ '-' + date[4:6] + '-' + date[6:8] + ' ' + date[9:11] + ':00:00')
        print(date)
        
        ### Estimated      
        # open raw image of the same date
        im_fn = glob.glob(im_path + date[0:8] + '*_adj.tif')[0] # define file name
        im = rxr.open_rasterio(im_fn) # open image as xarray.DataArray
        # create xarray.Dataset
        im_adj = xr.Dataset(
            data_vars=dict(
                Blue=(['y', 'x'], im.data[0]),
                Green=(['y', 'x'], im.data[1]),
                Red=(['y', 'x'], im.data[2]),
                NIR=(['y', 'x'], im.data[3])
            ),
            coords=im.coords,
            attrs=dict(
                no_data_values=np.nan,
                image_scalar=1
            )
        )
        im_adj = xr.where(im_adj != 0, im_adj/1e4, np.nan)
        im_adj = im_adj.rio.write_crs('EPSG:' + str(im.rio.crs.to_epsg()))
        # add NDSI band
        im_adj['NDSI'] = ((im_adj[dataset_dict[dataset]['NDSI_bands'][0]] - im_adj[dataset_dict[dataset]['NDSI_bands'][1]])
                          / (im_adj[dataset_dict[dataset]['NDSI_bands'][0]] + im_adj[dataset_dict[dataset]['NDSI_bands'][1]]))
        # add time dimension
        im_adj = im_adj.expand_dims({'time': [datetime]})
        # classify image
        im_classified_fn = site_name + '_' + date + '_PlanetScope_classified.nc'
        if os.path.exists(im_classified_path + im_classified_fn):
            print('Classified image already exists in file, loading...')
            im_classified = xr.open_dataset(im_classified_path + im_classified_fn)
            # remove no data values
            im_classified = xr.where(im_classified==-9999, np.nan, im_classified)
        else:  
            im_classified = f.classify_image(im_adj, clf, feature_cols, True, AOI, DEM, dataset_dict, dataset, 
                                             im_classified_fn, im_classified_path)
        # delineate snowline
        snowline_fn = site_name + '_' + date + '_PlanetScope_snowline.csv'
        sl_est = f.delineate_image_snowline(im_adj, im_classified, site_name, AOI, dataset_dict, dataset, date, 
                                            snowline_fn, snowlines_est_path, figures_out_path, plot_results=False)
               
        # check if snowlines were found
        if len(sl_est['snowlines_coords_X'][0]):
            # -----Sample elevations at observed snowline points
            xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
            ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
            sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data for x,y in list(zip(xsamp, ysamp))]

            # -----Split line depending on distance between points
            max_dist = 100 # m
            line = sl_obs_UTM.geometry[0]
            first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
            points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
            isplit = [0] # point indices where to split the line
            for i, p in enumerate(points):
                if i!=0:
                    dist = p.distance(points[i-1])
                    if dist > max_dist:
                        isplit.append(i)
            isplit.append(len(points)) # add ending point to complete the last line
            line_split = [] # initialize split lines
            # loop through split indices
            if isplit:
                for i, p in enumerate(isplit[:-1]):
                    if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                        line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
            else:
                line_split = line
    
            # -----Regrid the observed snowlines to equal spacing
            dx = 30 # point spacing
            points_regrid = []
            for line in line_split:
                distances = np.arange(0, line.length, dx)
                line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                # filter points outside the AOI
                IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                points_AOI = [line_points[i] for i in IAOI]
                points_regrid = points_regrid + [p for p in points_AOI]

            # -----Calculate distance between each observed snowline point and the closest estimated snowline point
            distances = np.zeros(len(points_regrid))
            for i, p in enumerate(points_regrid):
                # find nearest point
                nearest_point = nearest_points(sl_est['geometry'][0], p)[0]
                # calculate distance between points
                distances[i] = p.distance(nearest_point)
            
            # -----Display results
            # plt.figure(figsize=(8, 8))
            # plt.imshow(np.dstack([im_adj['Red'].data[0], im_adj['Green'].data[0], im_adj['Blue'].data[0]]), 
            #            extent=(np.min(im_adj.x.data), np.max(im_adj.x.data), np.min(im_adj.y.data), np.max(im_adj.y.data)))
            # plt.plot([p.coords.xy[0][0] for p in points_regrid], 
            #          [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
            # plt.plot(sl_est['snowlines_coords_X'][0], sl_est['snowlines_coords_Y'][0], '.m', label='estimated')
            # plt.legend(loc='upper right')
            # plt.grid()
            # plt.title(datetime)
            # plt.show()

            # compile results in df
            result_df = pd.DataFrame({'study_site': site_name, 
                                      'datetime': datetime, 
                                      'snowline_obs': [points_regrid], 
                                      'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                      'snowline_est': [sl_est['geometry'][0]], 
                                      'snowline_est_elev_median': sl_est['snowlines_elevs_median_m'],
                                      'snowline_elev_median_differences': sl_est['snowlines_elevs_median_m'] - np.nanmedian(sl_obs_elev),
                                      'snowline_distances': [distances],
                                      'snowline_distance_median': np.nanmedian(distances)})

            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])
            
    print(' ')
            
# -----Save to file
results_fn = base_path + 'inputs-outputs/snowline_performance_PlanetScope.csv'
results_df.to_csv(results_fn, index=False)
print('Performance metrics saved to file: '+results_fn)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_differences'])
ax[1].set_title('Median snowline elevation differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['PlanetScope'],
                                 'ground distance median [m]': np.nanmedian(results_df['snowline_distance_median']),
                                 'ground distance IQR [m]': stats.iqr(results_df['snowline_distance_median']),
                                 'elevation difference median [m]': np.nanmedian(results_df['snowline_elev_median_differences']),
                                 'elevation difference IQR [m]': stats.iqr(results_df['snowline_elev_median_differences']),
                                 'N': len(results_df)
                                })

# print results
print('PlanetScope snowline performance')
print('----------')
print('Ground distance = ' + str(np.round(results_stats_df['ground distance median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['ground distance IQR [m]'][0],2)) + ' m')
print('Median elevation difference = ' + str(np.round(results_stats_df['elevation difference median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['elevation difference IQR [m]'][0],2)) + ' m')

# save to file
results_stats_fn = base_path + 'inputs-outputs/snowline_performance_stats_PlanetScope.csv'
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: '+results_stats_fn)

## Landsat

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # load digitized snow lines file names
    sl_obs_path = snowlines_obs_path + site_name + '/snowlines/'
    sl_obs_fns = sorted(glob.glob(sl_obs_path + '*.shp'))

    # define estimated snowlines dates
    if site_name == 'Wolverine':
        sl_est_dates = ['N/A', 'N/A', 'N/A', '20170919',
                        'N/A', '20180828', 'N/A', '20180929',
                        '20190628', 'N/A', '20190808', 'N/A',
                        '20200614', 'N/A', '20200817', '20200911',
                        'N/A', '20210719', '20210813', 'N/A']
    elif site_name == 'Gulkana':
        sl_est_dates = ['N/A', 'N/A', 'N/A', 'N/A','N/A',
                        '20180704', 'N/A', '20180913',
                        'N/A', '20190707', '20190808', 'N/A',
                        'N/A', 'N/A', '20200819', 'N/A',
                        'N/A', 'N/A', '20210712']
    elif site_name == 'LemonCreek':
        sl_est_dates = ['N/A',
                        '20180603', '20180619', '20180705', 'N/A', '20180721',
                        '20190521', '20190606', 'N/A', 'N/A', 'N/A',
                        '20200905',
                        '20210729', 'N/A', '20210823']
    elif site_name == 'SouthCascade':
        sl_est_dates = ['N/A', '20160913',
                        'N/A', 'N/A', 'N/A', 'N/A', 'N/A',
                        '20180726', 'N/A', 'N/A', '20180928',
                        '20190720', 'N/A', 'N/A', 
                        'N/A', '20200816', '20200908', '20201003',
                        '20210810', 'N/A']
    elif site_name == 'Sperry':
        sl_est_dates = ['N/A', 'N/A', 
                        '20170727', 'N/A', '20170828', 'N/A',
                        '20180730', '20180815', 'N/A',
                        'N/A', 'N/A', 'N/A', 'N/A',
                        'N/A', 'N/A', 'N/A', '20201007',
                        'N/A', '20210722']
    
    # load AOI
    AOI_fn = glob.glob(study_sites_path + site_name + '/AOIs/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # load DEM
    DEM_fn = glob.glob(study_sites_path + site_name + '/DEMs/' + site_name + '*USGS_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # -----Loop through snowline pairs
    for sl_obs_fn, sl_est_date in list(zip(sl_obs_fns, sl_est_dates)):
        
        # if observed image date exists for comparison
        print(sl_obs_fn.split('/')[-1], sl_est_date)

        if sl_est_date != 'N/A':
            
            # -----Load snowlines
            ### Observed
            sl_obs = gpd.read_file(sl_obs_fn)
            # drop None geometry columns
            sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
            # reproject observed snow line to UTM
            sl_obs_UTM = sl_obs.to_crs('EPSG:'+str(AOI.crs.to_epsg()))
            ### Estimated
            sl_est_fn = glob.glob(study_sites_path + site_name + '/imagery/snowlines/*'+sl_est_date+'*Landsat*.csv')[0]
            sl_est = pd.read_csv(sl_est_fn)
            # continue if there is a snowline estimate...
            if sl_est['geometry'][0] != '[]':
                
                sl_est['geometry'] = gpd.GeoSeries.from_wkt(sl_est['geometry'])
            
                # -----Sample elevations at observed snowline points
                xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
                ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
                sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
                
                # -----Split line depending on distance between points
                max_dist = 100 # m
                line = sl_obs_UTM.geometry[0]
                first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
                points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
                isplit = [0] # point indices where to split the line
                for i, p in enumerate(points):
                    if i!=0:
                        dist = p.distance(points[i-1])
                        if dist > max_dist:
                            isplit.append(i)
                isplit.append(len(points)) # add ending point to complete the last line
                line_split = [] # initialize split lines
                # loop through split indices
                if isplit:
                    for i, p in enumerate(isplit[:-1]):
                        if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                            line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
                else:
                    line_split = line
                    
                # -----Regrid the observed snowlines to equal spacing
                dx = 30 # point spacing
                points_regrid = []
                for line in line_split:
                    distances = np.arange(0, line.length, dx)
                    line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                    # filter points outside the AOI
                    IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                    points_AOI = [line_points[i] for i in IAOI]
                    points_regrid = points_regrid + [p for p in points_AOI]
                
                # -----Calculate distance between each observed snowline point and the closest estimated snowline point
                distances = np.zeros(len(points_regrid))
                for i, p in enumerate(points_regrid):
                    # find nearest point
                    nearest_point = nearest_points(sl_est['geometry'][0], p)[0]
                    # calculate distance between points
                    distances[i] = p.distance(nearest_point)
            
                # -----Display results
                # plt.figure(figsize=(8, 8))
                # plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                #          [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
                # plt.plot(*sl_est['geometry'][0].coords.xy, '.m', label='estimated')
                # plt.legend(loc='upper right')
                # plt.grid()
                # plt.title(sl_est_date)
                # plt.show()

                # compile results in df
                result_df = pd.DataFrame({'study_site': site_name, 
                                          'snowline_obs_date': sl_obs_fn.split('/')[-1].split(site_name+'_')[-1][0:8], 
                                          'snowline_est_date': sl_est_date,
                                          'snowline_obs': [points_regrid], 
                                          'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                          'snowline_est': [sl_est['geometry']], 
                                          'snowline_est_elev_median': sl_est['snowline_elevs_median_m'],
                                          'snowline_elev_median_differences': sl_est['snowline_elevs_median_m'] - np.nanmedian(sl_obs_elev),
                                          'snowline_distances': [distances],
                                          'snowline_distance_median': np.nanmedian(distances)})
                # concatenate to results_df
                results_df = pd.concat([results_df, result_df])
            
    print(' ')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df.dropna()['snowline_elev_median_differences'])
ax[1].set_title('snowline_elev_median_differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Landsat'],
                                 'ground distance median [m]': np.nanmedian(results_df['snowline_distance_median']),
                                 'ground distance IQR [m]': stats.iqr(results_df['snowline_distance_median'], nan_policy='omit'),
                                 'elevation difference median [m]': np.nanmedian(results_df['snowline_elev_median_differences']),
                                 'elevation difference IQR [m]': stats.iqr(results_df['snowline_elev_median_differences'], nan_policy='omit'),
                                 'N': len(results_df)
                                })

# print results
print('Landsat snowline performance')
print('----------')
print('Ground distance = ' + str(np.round(results_stats_df['ground distance median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['ground distance IQR [m]'][0],2)) + ' m')
print('Median elevation difference = ' + str(np.round(results_stats_df['elevation difference median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['elevation difference IQR [m]'][0],2)) + ' m')

# save to file
results_stats_fn = base_path + 'inputs-outputs/snowline_performance_stats_Landsat.csv'
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: '+results_stats_fn)

## Sentinel-2 SR

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # load digitized snow lines file names
    sl_obs_path = snowlines_obs_path + site_name + '/snowlines/'
    sl_obs_fns = sorted(glob.glob(sl_obs_path + '*.shp'))
    # remove pre-2019 observations
    sl_obs_fns = [fn for fn in sl_obs_fns if ('2016' not in fn) & ('2017' not in fn) & ('2018' not in fn)]

    # define estimated snowlines dates
    if site_name=='Wolverine':
        sl_est_dates = ['20190626', '20190704', '20190808', '20190818',
                        '20200613', '20200723', '20200817', '20200911',
                        '20210613', '20210715', 'N/A', '20210911']
    elif site_name=='Gulkana':
        sl_est_dates = ['20190621', '20190709', '20190808', '20190830',
                        '20200703', '20200804', '20200819', '20200906',
                        '20210620', '20210705', '20210713']
    elif site_name=='LemonCreek':
        sl_est_dates = ['20190527', '20190606', '20190628', '20190706', '20190723',
                        '20200905',
                        '20210730', '20210730', '20210821']
    elif site_name=='SouthCascade':
        sl_est_dates = ['N/A', 'N/A', 'N/A', 
                        '20200731', '20200818', '20200909', '20201004', 
                        '20210813', '20210828']
    elif site_name=='Sperry':
        sl_est_dates = ['20190723', '20190731', '20190802', '20190807',
                        '20200727', '20200821', '20200905', '20201008', 
                        '20210712', '20210725']
    
    # load AOI
    AOI_fn = glob.glob(study_sites_path + site_name + '/AOIs/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # load DEM
    DEM_fn = glob.glob(study_sites_path + site_name + '/DEMs/' + site_name + '*USGS_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # -----Loop through snowline pairs
    for sl_obs_fn, sl_est_date in list(zip(sl_obs_fns, sl_est_dates)):
        
        # if observed image date exists for comparison
        if sl_est_date != 'N/A':
            
            print(sl_obs_fn.split('/')[-1], sl_est_date)

            # -----Load snowlines
            ### Observed
            sl_obs = gpd.read_file(sl_obs_fn)
            # drop None geometry columns
            sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
            # reproject observed snow line to UTM
            sl_obs_UTM = sl_obs.to_crs('EPSG:'+str(AOI.crs.to_epsg()))
            ### Estimated
            sl_est_fn = glob.glob(study_sites_path + site_name + '/imagery/snowlines/*'+sl_est_date+'*Sentinel-2_SR*.csv')[0]
            sl_est = pd.read_csv(sl_est_fn)
            # continue if there is a snowline estimate...
            if sl_est['geometry'][0] != '[]':
                
                sl_est['geometry'] = gpd.GeoSeries.from_wkt(sl_est['geometry'])
            
                # -----Sample elevations at observed snowline points
                xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
                ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
                sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
                
                # -----Split line depending on distance between points
                max_dist = 100 # m
                line = sl_obs_UTM.geometry[0]
                first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
                points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
                isplit = [0] # point indices where to split the line
                for i, p in enumerate(points):
                    if i!=0:
                        dist = p.distance(points[i-1])
                        if dist > max_dist:
                            isplit.append(i)
                isplit.append(len(points)) # add ending point to complete the last line
                line_split = [] # initialize split lines
                # loop through split indices
                if isplit:
                    for i, p in enumerate(isplit[:-1]):
                        if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                            line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
                else:
                    line_split = line
                    
                # -----Regrid the observed snowlines to equal spacing
                dx = 30 # point spacing
                points_regrid = []
                for line in line_split:
                    distances = np.arange(0, line.length, dx)
                    line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                    # filter points outside the AOI
                    IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                    points_AOI = [line_points[i] for i in IAOI]
                    points_regrid = points_regrid + [p for p in points_AOI]
                
                # -----Calculate distance between each observed snowline point and the closest estimated snowline point
                distances = np.zeros(len(points_regrid))
                for i, p in enumerate(points_regrid):
                    # find nearest point
                    nearest_point = nearest_points(sl_est['geometry'][0], p)[0]
                    # calculate distance between points
                    distances[i] = p.distance(nearest_point)
            
                # -----Display results
                # plt.figure(figsize=(8, 8))
                # plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                #          [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
                # plt.plot(*sl_est['geometry'][0].coords.xy, '.m', label='estimated')
                # plt.legend(loc='upper right')
                # plt.grid()
                # plt.title(sl_est_date)
                # plt.show()

                # compile results in df
                result_df = pd.DataFrame({'study_site': site_name, 
                                          'snowline_obs_date': sl_obs_fn.split('/')[-1].split(site_name+'_')[-1][0:8], 
                                          'snowline_est_date': sl_est_date,
                                          'snowline_obs': [points_regrid], 
                                          'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                          'snowline_est': [sl_est['geometry']], 
                                          'snowline_est_elev_median': sl_est['snowline_elevs_median_m'],
                                          'snowline_elev_median_differences': sl_est['snowline_elevs_median_m'] - np.nanmedian(sl_obs_elev),
                                          'snowline_distances': [distances],
                                          'snowline_distance_median': np.nanmedian(distances)})
                # concatenate to results_df
                results_df = pd.concat([results_df, result_df])
            
    print(' ')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df.dropna()['snowline_elev_median_differences'])
ax[1].set_title('snowline_elev_median_differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Sentinel-2_SR'],
                                 'ground distance median [m]': np.nanmedian(results_df['snowline_distance_median']),
                                 'ground distance IQR [m]': stats.iqr(results_df['snowline_distance_median'], nan_policy='omit'),
                                 'elevation difference median [m]': np.nanmedian(results_df['snowline_elev_median_differences']),
                                 'elevation difference IQR [m]': stats.iqr(results_df['snowline_elev_median_differences'], nan_policy='omit'),
                                 'N': len(results_df)
                                })

# print results
print('Sentinel-2 SR snowline performance')
print('----------')
print('Ground distance = ' + str(np.round(results_stats_df['ground distance median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['ground distance IQR [m]'][0],2)) + ' m')
print('Median elevation difference = ' + str(np.round(results_stats_df['elevation difference median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['elevation difference IQR [m]'][0],2)) + ' m')

# save to file
results_stats_fn = base_path + 'inputs-outputs/snowline_performance_stats_Sentinel-2_SR.csv'
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: '+results_stats_fn)

## Sentinel-2 TOA

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # load digitized snow lines file names
    sl_obs_path = snowlines_obs_path + site_name + '/snowlines/'
    sl_obs_fns = sorted(glob.glob(sl_obs_path + '*.shp'))

    # define estimated snowlines dates
    if site_name=='Wolverine':
        sl_est_dates = ['N/A', 'N/A', 'N/A', 'N/A',
                        '20180731', '20180828', '20180902', '20180929',
                        '20190626', '20190704', '20190805', '20190818',
                        '20200613', '20200723', '20200817', 'N/A',
                        '20210613', '20210715', 'N/A', '20210911']
    elif site_name=='Gulkana':
        sl_est_dates = ['20170621', 'N/A', '20170805', 'N/A',
                        '20180704', '20180721', '20180909',
                        '20190621', '20190709', '20190808', '20190830',
                        '20200703', '20200804', '20200819', '20200906',
                        '20210620', '20210705', '20210713']
    elif site_name=='LemonCreek':
        sl_est_dates = ['20170810', 
                        '20180603', '20180618', '20180703', '20180713', '20180723',
                        '20190527', '20190606', '20190628', '20190706', '20190723',
                        '20200905',
                        '20210730', '20210730', '20210821']
    elif site_name=='SouthCascade':
        sl_est_dates = ['20160816', 'N/A',
                        'N/A', '20170811', '20170826', '20170915', '20171005',
                        '20180730', '20180809', '20180905', '20180928',
                        '20190722', '20190730', '20190819', 
                        '20200731', '20200818', '20200909', '20201004',
                        '20210813', '20210828']
    elif site_name=='Sperry':
        sl_est_dates = ['20160817', '20160830',
                        '20170731', '20170807', '20170825', '20170901',
                        '20180731', '20180815', 'N/A',
                        '20190723', '20190731', '20190802', '20190807']
    
    # load AOI
    AOI_fn = glob.glob(study_sites_path + site_name + '/AOIs/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # load DEM
    DEM_fn = glob.glob(study_sites_path + site_name + '/DEMs/' + site_name + '*USGS_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # -----Loop through snowline pairs
    for sl_obs_fn, sl_est_date in list(zip(sl_obs_fns, sl_est_dates)):
        
        # if observed image date exists for comparison
        print(sl_obs_fn.split('/')[-1], sl_est_date)
        if sl_est_date != 'N/A':
                        
            # -----Load snowlines
            ### Observed
            sl_obs = gpd.read_file(sl_obs_fn)
            # drop None geometry columns
            sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
            # reproject observed snow line to UTM
            sl_obs_UTM = sl_obs.to_crs('EPSG:'+str(AOI.crs.to_epsg()))
            ### Estimated
            sl_est_fn = glob.glob(study_sites_path + site_name + '/imagery/snowlines/*'+sl_est_date+'*Sentinel-2_TOA*.csv')[0]
            sl_est = pd.read_csv(sl_est_fn)
            # continue if there is a snowline estimate...
            if sl_est['geometry'][0] != '[]':
                
                sl_est['geometry'] = gpd.GeoSeries.from_wkt(sl_est['geometry'])
            
                # -----Sample elevations at observed snowline points
                xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
                ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
                sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
                
                # -----Split line depending on distance between points
                max_dist = 100 # m
                line = sl_obs_UTM.geometry[0]
                first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
                points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
                isplit = [0] # point indices where to split the line
                for i, p in enumerate(points):
                    if i!=0:
                        dist = p.distance(points[i-1])
                        if dist > max_dist:
                            isplit.append(i)
                isplit.append(len(points)) # add ending point to complete the last line
                line_split = [] # initialize split lines
                # loop through split indices
                if isplit:
                    for i, p in enumerate(isplit[:-1]):
                        if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                            line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
                else:
                    line_split = line
                    
                # -----Regrid the observed snowlines to equal spacing
                dx = 30 # point spacing
                points_regrid = []
                for line in line_split:
                    distances = np.arange(0, line.length, dx)
                    line_points = [line.interpolate(distance) for distance in distances] + [first_point]
                    # filter points outside the AOI
                    IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
                    points_AOI = [line_points[i] for i in IAOI]
                    points_regrid = points_regrid + [p for p in points_AOI]
                
                # -----Calculate distance between each observed snowline point and the closest estimated snowline point
                distances = np.zeros(len(points_regrid))
                for i, p in enumerate(points_regrid):
                    # find nearest point
                    nearest_point = nearest_points(sl_est['geometry'][0], p)[0]
                    # calculate distance between points
                    distances[i] = p.distance(nearest_point)
            
                # -----Display results
                # plt.figure(figsize=(8, 8))
                # plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                #          [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
                # plt.plot(*sl_est['geometry'][0].coords.xy, '.m', label='estimated')
                # plt.legend(loc='upper right')
                # plt.grid()
                # plt.title(sl_est_date)
                # plt.show()

                # compile results in df
                result_df = pd.DataFrame({'study_site': site_name, 
                                          'snowline_obs_date': sl_obs_fn.split('/')[-1].split(site_name+'_')[-1][0:8], 
                                          'snowline_est_date': sl_est_date,
                                          'snowline_obs': [points_regrid], 
                                          'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                          'snowline_est': [sl_est['geometry']], 
                                          'snowline_est_elev_median': sl_est['snowline_elevs_median_m'],
                                          'snowline_elev_median_differences': sl_est['snowline_elevs_median_m'] - np.nanmedian(sl_obs_elev),
                                          'snowline_distances': [distances],
                                          'snowline_distance_median': np.nanmedian(distances)})
                # concatenate to results_df
                results_df = pd.concat([results_df, result_df])
            
    print(' ')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df.dropna()['snowline_elev_median_differences'])
ax[1].set_title('snowline_elev_median_differences')
plt.show()

# compile stats in dataframe
results_stats_df = pd.DataFrame({'dataset':['Sentinel-2_TOA'],
                                 'ground distance median [m]': np.nanmedian(results_df['snowline_distance_median']),
                                 'ground distance IQR [m]': stats.iqr(results_df['snowline_distance_median'], nan_policy='omit'),
                                 'elevation difference median [m]': np.nanmedian(results_df['snowline_elev_median_differences']),
                                 'elevation difference IQR [m]': stats.iqr(results_df['snowline_elev_median_differences'], nan_policy='omit'),
                                 'N': len(results_df)
                                })

# print results
print('Sentinel-2 TOA snowline performance')
print('----------')
print('Ground distance = ' + str(np.round(results_stats_df['ground distance median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['ground distance IQR [m]'][0],2)) + ' m')
print('Median elevation difference = ' + str(np.round(results_stats_df['elevation difference median [m]'][0],2)) 
      + ' +/- ' + str(np.round(results_stats_df['elevation difference IQR [m]'][0],2)) + ' m')

# save to file
results_stats_fn = base_path + 'inputs-outputs/snowline_performance_stats_Sentinel-2_TOA.csv'
results_stats_df.to_csv(results_stats_fn, index=False)
print('Performance metrics saved to file: '+results_stats_fn)

## Compile all stats tables into one CSV

In [None]:
# grab stats file names
fns = sorted(glob.glob(base_path + 'inputs-outputs/snowline_performance_stats_*.csv'))

# initialize dataframe for all files
results_full = pd.DataFrame()

# loop through files
for fn in fns:
    # open file
    results = pd.read_csv(fn)
    # concatenate to full dataframe
    results_full = pd.concat([results_full, results])
    
# add column for average metrics
results = pd.DataFrame({'dataset': 'All datasets AVERAGE',
                        'ground distance median [m]': np.nanmean(results_full['ground distance median [m]']),
                        'ground distance IQR [m]': np.nanmean(results_full['ground distance IQR [m]']),
                        'elevation difference median [m]': np.nanmean(results_full['elevation difference median [m]']),
                        'elevation difference IQR [m]': np.nanmean(results_full['elevation difference IQR [m]']),
                        'N': np.sum(results_full['N'])
                       }, index=[5])
results_full = pd.concat([results_full, results])
                        
    
# save full dataframe to file
results_full_fn = 'snowline_performance_stats.csv'
results_full.to_csv(base_path + 'inputs-outputs/' + results_full_fn, index=False)
print('stats for all datasets compiled and saved: ' + base_path + 'inputs-outputs/' + results_full_fn)

# delete individual files
for fn in fns:
    os.remove(fn)
    print('file deleted: '+fn)