# Conduct snowline accuracy assessment

Rainey Aberle

October 2022

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import rioxarray as rxr
import rasterio as rio
import xarray as xr
import ee
import wxee as wx
import glob
import geopandas as gpd
import sys
from scipy import stats
import skimage.io
from skimage import feature
from shapely.geometry import Point, LineString, shape, MultiPolygon, Polygon
from shapely.ops import split, unary_union, polygonize, nearest_points
import pandas as pd

In [None]:
# path to snow-cover-mapping
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# names of study sites
site_names = ['Wolverine', 'LemonCreek', 'SouthCascade', 'Sperry', 'Gulkana']
# path for output figures
figures_out_path = base_path+'figures/'

# add path to functions
sys.path.insert(1, base_path+'functions/')
import pipeline_utils as f

## Planet-Scope

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # define path to classified snow images
    im_path = base_path + '../study-sites/' + site_name + '/imagery/PlanetScope/'

    # define path to digitized snow lines
    sl_obs_path = base_path + '../snowline-package/' + site_name + '/snowlines/'

    # load observed snow line shapefile names
    sl_obs_fns = glob.glob(sl_obs_path + '*.shp')
    sl_obs_fns = sorted(sl_obs_fns) # sort chronologically

    # load estimated snowlines
    sl_est_fn = glob.glob(im_path + 'snowlines/*' + site_name + '*snowlines.pkl')[0]
    sl_ests = pd.read_pickle(sl_est_fn)
    
    # AOI
    AOI_fn = glob.glob(base_path + '../study-sites/' + site_name + '/glacier_outlines/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # DEM
    DEM_fn = glob.glob(base_path + '../study-sites/' + site_name + '/DEMs/' + site_name + '*_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # initialize variables
    sl_obs_elevs = [None]*len(sl_obs_fns) # observed snow elevations
    datetimes = [None]*len(sl_obs_fns) # image datetimes

    # loop through observed snow lines
    for sl_obs_fn in sl_obs_fns:

        # -----Load datasets
        ### Observed
        sl_obs = gpd.read_file(sl_obs_fn)
        # drop None geometry columns
        sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
        # reproject observed snow line to UTM
        sl_obs_UTM = sl_obs.to_crs(str(AOI.crs.to_epsg()))
        # extract date from filename
        date = sl_obs_fn.split('/'+site_name+'_')[1][0:11]
        datetime = np.datetime64(date[0:4]+ '-' + date[4:6] + '-' + date[6:8] + ' ' + date[9:11] + ':00:00')
        print(date)
        
        ### PlanetScope
        sl_est = sl_ests.loc[sl_ests.datetime==datetime]
        sl_est = sl_est.reset_index(drop=True)
        # drop n/a
        sl_est = sl_est[sl_est['snowlines_elevs_median'].notna()]
        # format datetimes as np.datetime64
        sl_ests['datetime'] = sl_ests['datetime'].astype(np.datetime64)
        # check if any snowlines exist
        if len(sl_est['snowlines_coords'])==0:
            print('No data for this date, continuing...')
            continue
        # check if snowline is None
        if sl_est['snowlines_coords'][0]==None:
            print('No snowline detected, continuing...')
            continue        
        # open adjusted image of the same date
        im_adj_fn = glob.glob(im_path + 'adjusted/*' + date[0:8] + '*.nc')[0] # define file name
        im_adj = xr.open_dataset(im_adj_fn) # open image as xarray.DataArray
        im_adj = im_adj.isel(time=0)
         # open classified image from the same date
        im_classified_fn = glob.glob(im_path + 'classified/*' + date[0:8] + '*.nc')[0] # define file name
        im_classified = xr.open_dataset(im_classified_fn) # open image as xarray.DataArray
        im_classified = im_classified.isel(time=0)
               
        # -----Sample elevations at observed snowline points
        xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
        ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
        sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
        
        # -----Split line depending on distance between points
        max_dist = 100 # m
        line = sl_obs_UTM.geometry[0]
        first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
        points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
        isplit = [0] # point indices where to split the line
        for i, p in enumerate(points):
            if i!=0:
                dist = p.distance(points[i-1])
                if dist > max_dist:
                    isplit.append(i)
        isplit.append(len(points)) # add ending point to complete the last line
        line_split = [] # initialize split lines
        # loop through split indices
        if isplit:
            for i, p in enumerate(isplit[:-1]):
                if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                    line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
        else:
            line_split = line
    
        # -----Regrid the observed snowlines to equal spacing
        dx = 30 # point spacing
        points_regrid = []
        for line in line_split:
            distances = np.arange(0, line.length, dx)
            line_points = [line.interpolate(distance) for distance in distances] + [first_point]
            # filter points outside the AOI
            IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
            points_AOI = [line_points[i] for i in IAOI]
            points_regrid = points_regrid + [p for p in points_AOI]

        # -----Calculate distance between each observed snowline point and the closest estimated snowline point
        distances = np.zeros(len(points_regrid))
        for i, p in enumerate(points_regrid):
            # find nearest point
            nearest_point = nearest_points(sl_est['snowlines_coords'][0], p)[0]
            # calculate distance between points
            distances[i] = p.distance(nearest_point)
            
        # -----Display results
        plt.figure(figsize=(8, 8))
        plt.imshow(np.dstack([im_adj['red'].data, im_adj['green'].data, im_adj['blue'].data]), 
                   extent=(np.min(im_adj.x.data), np.max(im_adj.x.data), np.min(im_adj.y.data), np.max(im_adj.y.data)))
        plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                 [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
        plt.plot(*sl_est['snowlines_coords'][0].coords.xy, '.m', label='estimated')
        plt.legend(loc='upper right')
        plt.grid()
        plt.title(datetime)
        plt.show()
        
        # compile results in df
        result_df = pd.DataFrame({'study_site': site_name, 
                                  'datetime': datetime, 
                                  'snowline_obs': [points_regrid], 
                                  'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                  'snowline_est': [sl_est['snowlines_coords'][0]], 
                                  'snowline_est_elev_median': sl_est['snowlines_elevs_median'],
                                  'snowline_elev_median_difference': sl_est['snowlines_elevs_median'] - np.nanmedian(sl_obs_elev),
                                  'snowline_distances': [distances],
                                  'snowline_distance_median': np.nanmedian(distances)})
        
        if i==0:
            results_df = result_df
        else:
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])
            


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_difference'])
ax[1].set_title('snowline_elev_median_difference')
plt.show()

print('PlanetScope snowline accuracy')
print('----------')
print('Points distance: ' + str(np.round(np.nanmedian(results_df['snowline_distance_median']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_distance_median']),2)) + ' m')
print('Snowline median elevation: ' + str(np.round(np.nanmedian(results_df['snowline_elev_median_difference']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_elev_median_difference']),2)) + ' m')

## Landsat

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # define path to classified snow images
    im_path = base_path + '../study-sites/' + site_name + '/imagery/Landsat/masked/'

    # define path to digitized snow lines
    sl_obs_path = base_path + '../snowline-package/' + site_name + '/snowlines/'

    # load observed snow line shapefile names
    sl_obs_fns = glob.glob(sl_obs_path + '*.shp')
    sl_obs_fns = sorted(sl_obs_fns) # sort chronologically

    # load estimated snowlines
    sl_est_fn = glob.glob(im_path + '../snowlines/*' + site_name + '*snowlines.pkl')[0]
    sl_ests = pd.read_pickle(sl_est_fn)
    sl_ests['datetime'] = sl_ests['datetime'].astype(np.datetime64)
    sl_ests = sl_ests.dropna().reset_index(drop=True)
    
    # AOI
    AOI_fn = glob.glob(base_path + '../study-sites/' + site_name + '/glacier_outlines/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # DEM
    DEM_fn = glob.glob(base_path + '../study-sites/' + site_name + '/DEMs/' + site_name + '*_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # initialize variables
    sl_obs_elevs = [None]*len(sl_obs_fns) # observed snow elevations
    datetimes = [None]*len(sl_obs_fns) # image datetimes

    # loop through observed snow lines
    for sl_obs_fn in sl_obs_fns:

        # -----Load datasets
        ### Observed
        sl_obs = gpd.read_file(sl_obs_fn)
        # drop None geometry columns
        sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
        # reproject observed snow line to UTM
        sl_obs_UTM = sl_obs.to_crs(str(AOI.crs.to_epsg()))
        # extract date from filename
        date = sl_obs_fn.split('/'+site_name+'_')[1][0:11]
        datetime = np.datetime64(date[0:4]+ '-' + date[4:6] + '-' + date[6:8] + ' ' + date[9:11] + ':00:00')
        print(date)
        
        ### Estimated
        date_diffs = [np.abs(x-datetime) for x in sl_ests['datetime']]
        if np.min(date_diffs)>np.timedelta64('3', 'D'):
            print('No data found for this date, continuing...')
            continue
        sl_est = sl_ests.iloc[np.argmin(date_diffs)]
        # check if any snowlines exist
        if len(sl_est)==0:
            print('No data for this date, continuing...')
            continue
        # check if snowline is None
        # if any(sl_est['snowlines_coords'])==False:
        #     print('No snowline detected, continuing...')
        #     continue        
        # open adjusted image of the same date
        im_adj_fn = glob.glob(im_path + '*masked.nc')[0] # define file name
        im_adj = xr.open_dataset(im_adj_fn) # open image as xarray.DataArray
        im_adj = im_adj.sel(time=sl_est['datetime'], method='nearest')
               
        # -----Sample elevations at observed snowline points
        xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
        ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
        sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
        
        # -----Split line depending on distance between points
        max_dist = 100 # m
        line = sl_obs_UTM.geometry[0]
        first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
        points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
        isplit = [0] # point indices where to split the line
        for i, p in enumerate(points):
            if i!=0:
                dist = p.distance(points[i-1])
                if dist > max_dist:
                    isplit.append(i)
        isplit.append(len(points)) # add ending point to complete the last line
        line_split = [] # initialize split lines
        # loop through split indices
        if isplit:
            for i, p in enumerate(isplit[:-1]):
                if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                    line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
        else:
            line_split = line
    
        # -----Regrid the observed snowlines to equal spacing
        dx = 30 # point spacing
        points_regrid = []
        for line in line_split:
            distances = np.arange(0, line.length, dx)
            line_points = [line.interpolate(distance) for distance in distances] + [first_point]
            # filter points outside the AOI
            IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
            points_AOI = [line_points[i] for i in IAOI]
            points_regrid = points_regrid + [p for p in points_AOI]

        # -----Calculate distance between each observed snowline point and the closest estimated snowline point
        distances = np.zeros(len(points_regrid))
        for i, p in enumerate(points_regrid):
            # find nearest point
            nearest_point = nearest_points(sl_est['snowlines_coords'], p)[0]
            # calculate distance between points
            distances[i] = p.distance(nearest_point)
            
        # -----Display results
        plt.figure(figsize=(8, 8))
        plt.imshow(np.dstack([im_adj['SR_B4'].data, im_adj['SR_B3'].data, im_adj['SR_B2'].data]), 
                   extent=(np.min(im_adj.x.data), np.max(im_adj.x.data), np.min(im_adj.y.data), np.max(im_adj.y.data)))
        plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                 [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
        plt.plot(*sl_est['snowlines_coords'].coords.xy, '.m', label='estimated')
        plt.legend(loc='upper right')
        plt.grid()
        plt.title(datetime)
        plt.show()
        
        # compile results in df
        result_df = pd.DataFrame({'study_site': site_name, 
                                  'datetime': datetime, 
                                  'snowline_obs': [points_regrid], 
                                  'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                  'snowline_est': [sl_est['snowlines_coords']], 
                                  'snowline_est_elev_median': sl_est['snowlines_elevs_median'],
                                  'snowline_elev_median_difference': sl_est['snowlines_elevs_median'] - np.nanmedian(sl_obs_elev),
                                  'snowline_distances': [distances],
                                  'snowline_distance_median': np.nanmedian(distances)})
        
        if i==0:
            results_df = result_df
        else:
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])
            
        print(' ')
            
results_df

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_difference'])
ax[1].set_title('snowline_elev_median_difference')
plt.show()

print('Landsat snowline accuracy')
print('----------')
print('Points distance: ' + str(np.round(np.nanmedian(results_df['snowline_distance_median']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_distance_median']),2)) + ' m')
print('Snowline median elevation: ' + str(np.round(np.nanmedian(results_df['snowline_elev_median_difference']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_elev_median_difference']),2)) + ' m')

## Sentinel-2

In [None]:
# -----Loop through sites
results_df = pd.DataFrame()
for i, site_name in enumerate(site_names):    

    print(site_name)
    print('----------')
    
    # define path to classified snow images
    im_path = base_path + '../study-sites/' + site_name + '/imagery/Sentinel-2/masked/'

    # define path to digitized snow lines
    sl_obs_path = base_path + '../snowline-package/' + site_name + '/snowlines/'

    # load observed snow line shapefile names
    sl_obs_fns = glob.glob(sl_obs_path + '*.shp')
    sl_obs_fns = sorted(sl_obs_fns) # sort chronologically

    # load estimated snowlines
    sl_est_fn = glob.glob(im_path + '../snowlines/*' + site_name + '*snowlines.pkl')[0]
    sl_ests = pd.read_pickle(sl_est_fn)
    sl_ests['datetime'] = sl_ests['datetime'].astype(np.datetime64)
    sl_ests = sl_ests.dropna().reset_index(drop=True)
    
    # AOI
    AOI_fn = glob.glob(base_path + '../study-sites/' + site_name + '/glacier_outlines/' + site_name + '_USGS_*.shp')[0]
    AOI = gpd.read_file(AOI_fn)
    
    # DEM
    DEM_fn = glob.glob(base_path + '../study-sites/' + site_name + '/DEMs/' + site_name + '*_DEM*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rename({'band_data': 'elevation'})

    # initialize variables
    sl_obs_elevs = [None]*len(sl_obs_fns) # observed snow elevations
    datetimes = [None]*len(sl_obs_fns) # image datetimes

    # loop through observed snow lines
    for sl_obs_fn in sl_obs_fns:

        # -----Load datasets
        ### Observed
        sl_obs = gpd.read_file(sl_obs_fn)
        # drop None geometry columns
        sl_obs = sl_obs.drop(columns=['id']).dropna().reset_index(drop=True)
        # reproject observed snow line to UTM
        sl_obs_UTM = sl_obs.to_crs(str(AOI.crs.to_epsg()))
        # extract date from filename
        date = sl_obs_fn.split('/'+site_name+'_')[1][0:11]
        datetime = np.datetime64(date[0:4]+ '-' + date[4:6] + '-' + date[6:8] + ' ' + date[9:11] + ':00:00')
        print(date)
        
        ### Estimated
        date_diffs = [np.abs(x-datetime) for x in sl_ests['datetime']]
        print(np.min(date_diffs))
        if np.min(date_diffs)>np.timedelta64('3', 'D'):
            print('No data found for this date, continuing...')
            continue
        sl_est = sl_ests.iloc[np.argmin(date_diffs)]
        # check if any snowlines exist
        if len(sl_est)==0:
            print('No data for this date, continuing...')
            continue
        # check if snowline is None
        # if any(sl_est['snowlines_coords'])==False:
        #     print('No snowline detected, continuing...')
        #     continue        
        # open adjusted image of the same date
        im_adj_fn = glob.glob(im_path + '*' + str(sl_est['datetime']).replace('-','').replace(' ','T').replace(':','')
                              + '*masked.nc')[0] # define file name
        im_adj = xr.open_dataset(im_adj_fn) # open image as xarray.DataArray
        im_adj = im_adj.isel(time=0)
               
        # -----Sample elevations at observed snowline points
        xsamp = sl_obs_UTM.geometry[0].coords.xy[0]
        ysamp = sl_obs_UTM.geometry[0].coords.xy[1]
        sl_obs_elev = [DEM.sel(x=x, y=y, method='nearest')['elevation'].data[0] for x,y in list(zip(xsamp, ysamp))]
        
        # -----Split line depending on distance between points
        max_dist = 100 # m
        line = sl_obs_UTM.geometry[0]
        first_point = Point(line.coords.xy[0][0], line.coords.xy[1][0])
        points = [Point(line.coords.xy[0][i], line.coords.xy[1][i]) for i in np.arange(0,len(line.coords.xy[0]))]
        isplit = [0] # point indices where to split the line
        for i, p in enumerate(points):
            if i!=0:
                dist = p.distance(points[i-1])
                if dist > max_dist:
                    isplit.append(i)
        isplit.append(len(points)) # add ending point to complete the last line
        line_split = [] # initialize split lines
        # loop through split indices
        if isplit:
            for i, p in enumerate(isplit[:-1]):
                if isplit[i+1]-isplit[i] > 1: # must have at least two points to make a line
                    line_split = line_split + [LineString(points[isplit[i]:isplit[i+1]])]
        else:
            line_split = line
    
        # -----Regrid the observed snowlines to equal spacing
        dx = 30 # point spacing
        points_regrid = []
        for line in line_split:
            distances = np.arange(0, line.length, dx)
            line_points = [line.interpolate(distance) for distance in distances] + [first_point]
            # filter points outside the AOI
            IAOI = np.where(np.array([p.within(AOI.geometry[0]) for p in line_points], dtype=int) ==1)[0]
            points_AOI = [line_points[i] for i in IAOI]
            points_regrid = points_regrid + [p for p in points_AOI]

        # -----Calculate distance between each observed snowline point and the closest estimated snowline point
        distances = np.zeros(len(points_regrid))
        for i, p in enumerate(points_regrid):
            # find nearest point
            nearest_point = nearest_points(sl_est['snowlines_coords'], p)[0]
            # calculate distance between points
            distances[i] = p.distance(nearest_point)
            
        # -----Display results
        plt.figure(figsize=(8, 8))
        plt.imshow(np.dstack([im_adj['B4'].data, im_adj['B3'].data, im_adj['B2'].data]), 
                   extent=(np.min(im_adj.x.data), np.max(im_adj.x.data), np.min(im_adj.y.data), np.max(im_adj.y.data)))
        plt.plot([p.coords.xy[0][0] for p in points_regrid], 
                 [p.coords.xy[1][0] for p in points_regrid], '.c', label='observed')
        plt.plot(*sl_est['snowlines_coords'].coords.xy, '.m', label='estimated')
        plt.legend(loc='upper right')
        plt.grid()
        plt.title(sl_est['datetime'])
        plt.show()
        
        # compile results in df
        result_df = pd.DataFrame({'study_site': site_name, 
                                  'datetime': datetime, 
                                  'snowline_obs': [points_regrid], 
                                  'snowline_obs_elev_median': np.nanmedian(sl_obs_elev),
                                  'snowline_est': [sl_est['snowlines_coords']], 
                                  'snowline_est_elev_median': sl_est['snowlines_elevs_median'],
                                  'snowline_elev_median_difference': sl_est['snowlines_elevs_median'] - np.nanmedian(sl_obs_elev),
                                  'snowline_distances': [distances],
                                  'snowline_distance_median': np.nanmedian(distances)})
        
        if i==0:
            results_df = result_df
        else:
            # concatenate to results_df
            results_df = pd.concat([results_df, result_df])
            
        print(' ')
            
results_df

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,6))
ax[0].boxplot(results_df['snowline_distance_median'])
ax[0].set_title('snowline_distance_median')
ax[1].boxplot(results_df['snowline_elev_median_difference'])
ax[1].set_title('snowline_elev_median_difference')
plt.show()

print('Sentinel-2 snowline accuracy')
print('----------')
print('Points distance: ' + str(np.round(np.nanmedian(results_df['snowline_distance_median']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_distance_median']),2)) + ' m')
print('Snowline median elevation: ' + str(np.round(np.nanmedian(results_df['snowline_elev_median_difference']),2)) 
      + ' +/- ' + str(np.round(np.nanstd(results_df['snowline_elev_median_difference']),2)) + ' m')