# Transform elevations from the ellipsoid to the EGM96 geoid vertical reference

In [None]:
import xarray as xr
import pyproj
import os
import glob
import rioxarray as rxr
import sys
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from pyproj.crs import CompoundCRS
from tqdm.auto import tqdm
import pandas as pd
import geopandas as gpd
from shapely import wkt
from ast import literal_eval

In [None]:
# -----Define paths in directory
# path to study-sites
study_sites_path = '/Users/raineyaberle/Google Drive/My Drive/Research/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/'
# path to snow-cover-mapping
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# add path to functions
sys.path.insert(1, base_path+'functions/')
import pipeline_utils as f

In [None]:
# -----Grab list of site names in study_sites_path
site_names = sorted(os.listdir(study_sites_path))
site_names = [x for x in site_names if not x.startswith('.')]
# only include sites with snowlines
site_names = [x for x in site_names if len(glob.glob(study_sites_path + x + '/imagery/snowlines/*.csv')) > 0]
# only include sites with ArcticDEM geoid files
site_names = [x for x in site_names if os.path.exists(study_sites_path + x + '/DEMs/' + x + '_ArcticDEM_clip_geoid.tif')]
print(str(len(site_names)) + ' study sites:')
site_names

## Transform elevations for snowlines that used ArcticDEM Mosaic or USGS DEMs

In [None]:
# -----Load EGM96 geoid heights
egm96_fn = os.path.join(base_path, 'inputs-outputs', 'us_nga_egm96_15.tif')
egm96 = xr.open_dataset(egm96_fn)
egm96 = egm96.rename({'band_data': 'geoid_height'})

# -----Iterate over sites
bgotus_site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry'] # BGOTUS
    
for site_name in tqdm(bgotus_site_names):

    print(site_name)
    
    # load classified image and snowline file names
    snowline_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*.csv'))
    snowline_fns = [x for x in snowline_fns if '_adj' not in x]

    # iterate over snowlines
    for snowline_fn in tqdm(snowline_fns):
        
        # define adjusted file name and check if it exists
        snowline_adj_fn = snowline_fn[0:-4] + '_adj.csv'
        if os.path.exists(snowline_adj_fn):
            continue

        # load snowline
        try:
            snowline = pd.read_csv(snowline_fn)
        except:
            print('error opening ' + snowline_fn.split('/')[-1] + ', skipping...')
            continue
        
        # convert to geopandas DataFrame
        if snowline['geometry'][0] == '[]':
            snowline_adj = snowline
        else:
            # adjust dataframe
            snowline['geometry'] = snowline['geometry'].apply(wkt.loads)
            snowline_gdf = gpd.GeoDataFrame(snowline, geometry=snowline['geometry'], crs=snowline['CRS'][0])
            snowline_gdf[['snowlines_coords_X', 'snowlines_coords_Y']] = snowline_gdf[['snowlines_coords_X', 
                                                                                       'snowlines_coords_Y']].apply(pd.eval)
            # some elevations are 'nan', so they must be handled differently
            nan_dict = {'nan': np.nan}
            snowline_gdf['snowline_elevs_m'] = snowline_gdf['snowline_elevs_m'].apply(lambda x: eval(x.replace('nan', 'nan_dict["nan"]')))
    
            # reproject geometry to WGS84 horizontal coordinates
            snowline_adj = snowline_gdf.to_crs('EPSG:4326') 
        
            # interpolate geoid heights at snowline coordinates
            geoid_heights = [egm96.sel(x=x, y=y, method='nearest').geoid_height.data[0] for x,y in 
                             list(zip(snowline_adj.geometry[0].coords.xy[0], snowline_adj.geometry[0].coords.xy[1]))]
        
            # subtract geoid heights from snowline elevations
            snowline_adj['snowline_elevs_m'][0] = np.round(list(np.array(snowline_adj['snowline_elevs_m'][0]) - np.array(geoid_heights)),
                                                           decimals=2)

            # calculate median snowline elevation
            snowline_adj['snowline_elevs_median_m'][0] = np.nanmedian(snowline_adj['snowline_elevs_m'][0])
            
            # adjust ELA from AAR
            snowline_adj['ELA_from_AAR_m'] = np.round(snowline_adj['ELA_from_AAR_m'][0] - np.nanmean(geoid_heights),
                                                      decimals=2)
        
        # rename and reorder columns
        snowline_adj.rename(columns={'CRS': 'HorizontalCRS'}, inplace=True)
        snowline_adj['VerticalCRS'] = 'EPSG:5773'
        cols_adj = ['site_name', 'datetime', 'dataset', 'snowlines_coords_X', 'snowlines_coords_Y', 
                    'snowline_elevs_m', 'snowline_elevs_median_m', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 
                    'HorizontalCRS', 'VerticalCRS', 'geometry']
        snowline_adj = snowline_adj[cols_adj]
        
        # save to file
        snowline_adj.to_csv(snowline_adj_fn, index=False)
        
    print(' ')

### Plot adjusted snowlines to check they make sense

In [None]:
for site_name in bgotus_site_names:
    
    snowline_adj_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*_adj.csv'))
    
    snowlines = pd.DataFrame()
    for fn in snowline_adj_fns:
        
        snowline = pd.read_csv(fn)
        snowlines = pd.concat([snowlines, snowline])
        
    snowlines['datetime'] = pd.to_datetime(snowlines['datetime'], format='mixed')
    snowlines.reset_index(drop=True, inplace=True)
    
    plt.figure(figsize=(10,6))
    plt.plot(snowlines['datetime'], snowlines['snowline_elevs_median_m'], '.')
    plt.grid()
    plt.title(site_name)
    plt.show()

In [None]:
# Check if each site has the same number of snowline files as adjusted snowline files

for site_name in site_names:
    
    snowline_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*.csv'))
    snowline_fns = [x for x in snowline_fns if '_adj' not in x]
    snowline_adj_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*_adj.csv'))
    
    if len(snowline_fns)!=len(snowline_adj_fns):
        print(site_name)

### Delete old snowlines, rename adjusted files

In [None]:
# for site_name in tqdm(bgotus_site_names):
    
#     snowline_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*.csv'))
#     snowline_fns = [x for x in snowline_fns if '_adj' not in x]
#     snowline_adj_fns = sorted(glob.glob(study_sites_path + site_name + '/imagery/snowlines/*_adj.csv'))
        
#     for snowline_fn in snowline_fns:
#         os.remove(snowline_fn)
        
#     for snowline_adj_fn in snowline_adj_fns:
#         snowline_adj_fn_new = snowline_adj_fn.replace('_adj', '')
#         os.rename(snowline_adj_fn, snowline_adj_fn_new)
        

## Repeat for ELAs

In [None]:
site_names_elas = sorted(os.listdir(study_sites_path))
site_names_elas = [x for x in site_names_elas if not x.startswith('.')]
# only include sites with ELAs
site_names_elas = [x for x in site_names_elas if len(glob.glob(study_sites_path + x + '/ELAs/*.csv')) > 0]
# only include sites with ArcticDEM geoid files
site_names_elas = [x for x in site_names_elas if os.path.exists(study_sites_path + x + '/DEMs/' + x + '_ArcticDEM_clip_geoid.tif')]
print(str(len(site_names_elas)) + ' study sites:')
site_names_elas

In [None]:
# -----Load EGM96 geoid heights
egm96_fn = os.path.join(base_path, 'inputs-outputs', 'us_nga_egm96_15.tif')
egm96 = xr.open_dataset(egm96_fn)
egm96 = egm96.rename({'band_data': 'geoid_height'})

# -----Iterate over sites
bgotus_site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry'] # BGOTUS
    
for site_name in tqdm(site_names_elas[0:2]):

    print(site_name)
    
    # load classified image and snowline file names
    ela_fns = sorted(glob.glob(study_sites_path + site_name + '/ELAs/*.csv'))
    ela_fns = [x for x in ela_fns if '_adj' not in x]

    # iterate over snowlines
    for ela_fn in tqdm(ela_fns):
        
        # define adjusted file name and check if it exists
        ela_adj_fn = ela_fn[0:-4] + '_adj.csv'
        if os.path.exists(ela_adj_fn):
            continue

        # load ELAs
        try:
            ela = pd.read_csv(ela_fn)
        except:
            print('error opening ' + ela_fn.split('/')[-1] + ', skipping...')
            continue
        
        ela_gdf = gpd.GeoDataFrame(ela, geometry=ela['geometry'], crs=ela.loc[0, 'CRS'])
        ela_gdf[['snowlines_coords_X', 'snowlines_coords_Y']] = ela_gdf[['snowlines_coords_X', 
                                                                         'snowlines_coords_Y']].apply(pd.eval)
            
        for i in range(0,len(ela)):
            
            if ela['geometry'][i] == '[]':
                continue
                
            else:
                
                # convert to GeoDataFrame
                ela.loc[i, 'geometry'] = wkt.loads(ela.loc[i,'geometry'])
                
                # some elevations are 'nan', so they must be handled differently
                nan_dict = {'nan': np.nan}
                ela_gdf.loc[i,'snowline_elevs_m'] = pd.eval(ela_gdf.loc[i,'snowline_elevs_m'])#.replace('nan', 'nan_dict["nan"]')

                # reproject geometry to WGS84 horizontal coordinates
                ela_adj = ela_gdf.to_crs('EPSG:4326') 

                # interpolate geoid heights at snowline coordinates
                geoid_heights = [egm96.sel(x=x, y=y, method='nearest').geoid_height.data[0] for x,y in 
                                 list(zip(ela_adj.geometry[i].coords.xy[0], ela_adj.geometry[i].coords.xy[1]))]

                # subtract geoid heights from snowline elevations
                ela_adj.loc[i,'snowline_elevs_m'] = np.round(list(np.array(ela_adj.loc[i,'snowline_elevs_m']) - np.array(geoid_heights)),
                                                             decimals=2)

                # calculate median snowline elevation
                ela_adj.loc[i,'snowline_elevs_median_m'] = np.nanmedian(ela_adj.loc[i,'snowline_elevs_m'])

                # adjust ELA from AAR
                ela_adj.loc[i,'ELA_from_AAR_m'] = np.round(ela_adj.loc[i,'ELA_from_AAR_m'] - np.nanmean(geoid_heights),
                                                           decimals=2)
        
        # rename and reorder columns
        ela_adj.rename(columns={'CRS': 'HorizontalCRS'}, inplace=True)
        ela_adj['VerticalCRS'] = 'EPSG:5773'
        cols_adj = ['site_name', 'datetime', 'dataset', 'snowlines_coords_X', 'snowlines_coords_Y', 
                    'snowline_elevs_m', 'snowline_elevs_median_m', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 
                    'HorizontalCRS', 'VerticalCRS', 'geometry']
        ela_adj = ela_adj[cols_adj]
        
        # save to file
        # ela_adj.to_csv(ela_adj_fn, index=False)
        
    print(' ')

In [None]:
ela_gdf.loc[i,'snowline_elevs_m']

## Reproject USGS DEMs to the geoid

In [None]:
# -----Load EGM96 geoid heights
egm96_fn = os.path.join(base_path, 'inputs-outputs', 'us_nga_egm96_15.tif')
egm96 = xr.open_dataset(egm96_fn)
egm96 = egm96.rename({'band_data': 'geoid_height'})

# -----Iterate over sites
bgotus_site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry'] # BGOTUS

for site_name in bgotus_site_names:
    
    print(site_name)
    
    DEM_fn = glob.glob(study_sites_path + site_name + '/DEMs/*USGS*.tif')[0]
    DEM = xr.open_dataset(DEM_fn)
    DEM = DEM.rio.reproject('EPSG:4326')
    elevations = DEM.band_data.data[0]
    DEM = DEM.drop_dims('band')
    
    # interpolate geoid heights at snowline coordinates
    geoid_heights = egm96.sel(x=DEM.x.data, y=DEM.y.data, method='nearest').geoid_height.data[0]
    
    # subtract geoid heights from elevations
    DEM['elevation'] = (('y', 'x'), elevations - geoid_heights)
    
    # plot 
    plt.imshow(DEM.elevation.data, extent=(np.min(DEM.x.data), np.max(DEM.x.data), 
                                           np.min(DEM.y.data), np.max(DEM.y.data)))
    plt.colorbar()
    plt.show()
    
    # save to file
    DEM_geoid_fn = DEM_fn.replace('.tif', '_geoid.tif')
    DEM.rio.to_raster(DEM_geoid_fn)
    print('DEM referenced to geoid and saved to file: ' + DEM_geoid_fn)

In [None]:
elevations_geoid = DEM.band_data.data - geoid_heights
elevations_geoid

## For sites that use the NASADEM, adjust column names and resave

In [None]:
# -----Grab list of site names in study_sites_path
site_names = sorted(os.listdir(study_sites_path))
site_names = [x for x in site_names if not x.startswith('.')]
# only include sites with snowlines
site_names = [x for x in site_names if len(glob.glob(study_sites_path + x + '/imagery/snowlines/*.csv')) > 0]
# only include sites with ArcticDEM geoid files
site_names = [x for x in site_names if os.path.exists(study_sites_path + x + '/DEMs/' + x + '_NASADEM_clip.tif')]
print(str(len(site_names)) + ' study sites:')
site_names

In [None]:
#define columns order
cols = ['site_name', 'datetime', 'snowlines_coords_X', 'snowlines_coords_Y',
        'HorizontalReference', 'VerticalReference', 'snowline_elevs_m',
        'snowline_elevs_median_m', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 'dataset',
        'geometry']

# iterate over site names
for site_name in site_names:

    print(site_name)

    # grab snowline filenames
    snowlines_path = os.path.join(study_sites_path, site_name, 'imagery', 'snowlines')
    snowline_fns = glob.glob(snowlines_path + '/*.csv')

    # iterate over snowline file names
    for snowline_fn in tqdm(snowline_fns):
        snowline = pd.read_csv(snowline_fn)
        if 'HorizontalReference' in snowline.keys():
            continue

        # reproject geometry to WGS84 lat lon
        if snowline['geometry'][0]!='[]':
            snowline['geometry'] = snowline['geometry'].apply(wkt.loads)
            snowline_gdf = gpd.GeoDataFrame(snowline, crs=snowline['CRS'][0])
            snowline_gdf = snowline_gdf.to_crs('EPSG:4326')
        else:
            snowline_gdf = snowline
            
        snowline_gdf['HorizontalReference'] = snowline_gdf['CRS']
        snowline_gdf['VerticalReference'] = 'EGM96 geoid (EPSG:5773)'

        snowline_gdf = snowline_gdf[cols]

        # resave to file
        snowline_gdf.to_csv(snowline_fn, index=False)

In [None]:
# -----Grab list of site names in study_sites_path
site_names = sorted(os.listdir(study_sites_path))
site_names = [x for x in site_names if not x.startswith('.')]
# only include sites with snowlines
site_names = [x for x in site_names if len(glob.glob(study_sites_path + x + '/imagery/snowlines/*.csv')) > 0]
print(str(len(site_names)) + ' study sites:')
site_names

In [None]:
#define columns order
cols = ['site_name', 'datetime', 'snowlines_coords_X', 'snowlines_coords_Y',
        'HorizontalCRS', 'VerticalCRS', 'snowline_elevs_m',
        'snowline_elevs_median_m', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 'dataset',
        'geometry']

# iterate over site names
for site_name in site_names:

    print(site_name)

    # grab snowline filenames
    snowlines_path = os.path.join(study_sites_path, site_name, 'imagery', 'snowlines')
    snowline_fns = glob.glob(snowlines_path + '/*.csv')

    # iterate over snowline file names
    for snowline_fn in tqdm(snowline_fns):
        snowline = pd.read_csv(snowline_fn)
        if 'HorizontalCRS' in snowline.keys():
            continue
            
        snowline_gdf = snowline_gdf.rename({'HorizontalReference':'HorizontalCRS',
                                            'VerticalReference':'VerticalCRS'})

        # snowline_gdf = snowline_gdf[cols]

        # resave to file
        snowline_gdf.to_csv(snowline_fn, index=False)