# Adjust manual ELA picks and snow cover stats files for comparison and publishing

## Already ran, don't need to run again!

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import glob
from shapely.geometry import LineString, Point
from shapely import wkt
import ast
from tqdm.auto import tqdm

In [2]:
# Path to all study sites
data_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/'

In [3]:
def adjust_date(date):
    if ('T' in date) & ('-' in date):
        date_adj = date.replace('T', ' ')
    elif ('T' in date):
        date_adj = f'{date[0:4]}-{date[4:6]}-{date[6:8]} {date[9:11]}:{date[11:13]}:{date[13:]}' 
    else:
        date_adj = date
    return date_adj
    
def get_crs(df):
    if 'CRS' in list(df.columns):
        if (type(df['CRS'].values[0])==float) & ('HorizontalReference' in list(df.columns)):
            crs = df['HorizontalReference'].values[0]
        elif (type(df['CRS'].values[0])==float) & ('HorizontalCRS' in list(df.columns)):
            crs = df['HorizontalCRS'].values[0]
        else:
            crs = df['CRS'].values[0]
    elif 'HorizontalReference' in list(df.columns):
        crs = df['HorizontalReference'].values[0]
    elif 'HorizontalCRS' in list(df.columns):
        crs = df['HorizontalCRS'].values[0]
    return crs
    
def adjust_geom(geom, crs):
    x, y = geom.coords.xy[0], geom.coords.xy[1]
    if x[0] > 0:
        ls = LineString(list(zip(x, y)))
        gdf = gpd.GeoDataFrame(geometry=[ls], crs=crs)
        geom = gdf.to_crs('EPSG:4326').values[0][0]
    return geom  

## Adjust and compile manual ELA picks for each site

In [4]:
# Grab site names with manual ELA picks
rgi_ids = sorted([x for x in os.listdir(data_path) if os.path.exists(os.path.join(data_path, x, 'ELAs'))])
print(f'Sites with manual ELA picks = {len(rgi_ids)}')
rgi_ids

Sites with manual ELA picks = 94


['RGI60-01.00037',
 'RGI60-01.00038',
 'RGI60-01.00312',
 'RGI60-01.00570',
 'RGI60-01.01104',
 'RGI60-01.01151',
 'RGI60-01.01524',
 'RGI60-01.01733',
 'RGI60-01.08246',
 'RGI60-01.08248',
 'RGI60-01.08288',
 'RGI60-01.08296',
 'RGI60-01.09162',
 'RGI60-01.10778',
 'RGI60-01.11616',
 'RGI60-01.11654',
 'RGI60-01.11788',
 'RGI60-01.12370',
 'RGI60-01.14391',
 'RGI60-01.14523',
 'RGI60-01.16262',
 'RGI60-01.17464',
 'RGI60-01.17761',
 'RGI60-01.17774',
 'RGI60-01.17803',
 'RGI60-01.17807',
 'RGI60-01.19460',
 'RGI60-01.19592',
 'RGI60-01.19599',
 'RGI60-01.19682',
 'RGI60-01.19725',
 'RGI60-01.19773',
 'RGI60-01.19814',
 'RGI60-01.19825',
 'RGI60-01.20180',
 'RGI60-01.20181',
 'RGI60-01.20186',
 'RGI60-01.20196',
 'RGI60-01.20272',
 'RGI60-01.20274',
 'RGI60-01.20279',
 'RGI60-01.20286',
 'RGI60-01.20302',
 'RGI60-01.20303',
 'RGI60-01.20309',
 'RGI60-01.20324',
 'RGI60-01.20796',
 'RGI60-01.21014',
 'RGI60-01.22699',
 'RGI60-01.23597',
 'RGI60-01.23635',
 'RGI60-01.23649',
 'RGI60-01.2

In [5]:
# Iterate over site names
for rgi_id in tqdm(rgi_ids):
    # Load manual ELA picks
    ela_fns = glob.glob(os.path.join(data_path, rgi_id, 'ELAs', '*ELAs*.csv'))
    elas = pd.DataFrame()
    for fn in ela_fns:
        ela = pd.read_csv(fn)
        elas = pd.concat([elas, ela])
    elas.reset_index(drop=True, inplace=True)
    # Adjust datetimes
    elas['datetime'] = [adjust_date(d) for d in elas['datetime'].values]
    # Adjust geometries if snowline obs. exists
    if type(elas.geometry.values[0])==LineString:
        ela['geometry'] = ela['geometry'].apply(wkt.loads)
        crs = get_crs(elas)
        elas['geometry'] = [adjust_geom(geom, crs) for geom in elas['geometry'].values]
        elas['snowlines_coords_X'] = [list(x.coords.xy[0]) for x in elas['geometry'].values]
        elas['snowlines_coords_Y'] = [list(x.coords.xy[1]) for x in elas['geometry'].values]
    # Rename site name column
    if 'site_name' in list(elas.columns):
        elas.rename(columns={'site_name': 'RGIId'}, inplace=True)
    if 'study_site' in list(elas.columns):
        elas.rename(columns={'study_site': 'RGIId'}, inplace=True)
    # Select columns and order
    cols = ['RGIId', 'datetime', 'dataset', 'snowlines_coords_X', 'snowlines_coords_Y', 
            'snowline_elevs_m', 'snowline_elevs_median_m', 'SCA_m2', 'AAR']
    elas['RGIId'] = rgi_id
    elas = elas[cols]
    # Save to file
    out_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_ELAs_manual_picks.csv')
    elas.to_csv(out_fn, index=False)
    print('Compiled ELAs saved to file:', out_fn)

  0%|          | 0/94 [00:00<?, ?it/s]

Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.00037/RGI60-01.00037_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.00038/RGI60-01.00038_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.00312/RGI60-01.00312_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.00570/RGI60-01.00570_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.01104/RGI60-01.01104_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/RGI60-01.01151/RGI60-01.01151_ELAs_manual_picks.csv
Compiled ELAs saved to file: /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_m

## Adjust and compile automated snow cover stats for each site

In [None]:
# Grab site names with automated snow cover stats
rgi_ids = sorted([x for x in os.listdir(data_path) if os.path.exists(os.path.join(data_path, x, 'imagery', 'snowlines'))])
print(f'Sites with automated snow cover stats = {len(rgi_ids)}')
rgi_ids

In [None]:
# Iterate over site names
for rgi_id in tqdm(['RGI60-01.20186']):
    # Load automated snow cover stats
    sc_fns = sorted(glob.glob(os.path.join(data_path, rgi_id, 'imagery', 'snowlines', '*snowline*.csv')))
    scs = pd.DataFrame()
    for fn in sc_fns:
        sc = pd.read_csv(fn)
        scs = pd.concat([scs, sc])
    scs.reset_index(drop=True, inplace=True)
    # Adjust datetimes
    scs['datetime'] = [adjust_date(d) for d in scs['datetime'].values]
    # Adjust geometries
    geoms = []
    xs, ys = [], []
    crs = get_crs(scs)
    for geom in scs['geometry'].values:
        if geom!='[]':
            geom = wkt.loads(geom)
            geom = adjust_geom(geom, crs) 
            x, y = list(geom.coords.xy[0]), list(geom.coords.xy[1])
        else:
            x, y = [], []
        geoms.append(geom)
        xs.append(x)
        ys.append(y)
    scs['geometry'] = geoms
    scs['snowlines_coords_X'] = xs
    scs['snowlines_coords_Y'] = ys
    # Rename site name column
    if 'site_name' in list(scs.columns):
        scs.rename(columns={'site_name': 'RGIId'}, inplace=True)
    if 'study_site' in list(scs.columns):
        scs.rename(columns={'study_site': 'RGIId'}, inplace=True)
    # Select columns and order
    cols = ['RGIId', 'datetime', 'dataset', 'snowlines_coords_X', 'snowlines_coords_Y', 
            'snowline_elevs_m', 'snowline_elevs_median_m', 'SCA_m2', 'AAR', 'ELA_from_AAR_m']
    scs = scs[cols]
    scs['RGIId'] = rgi_id

    # Save to file
    out_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    scs.to_csv(out_fn, index=False)
    print('Compiled snow cover stats saved to file:', out_fn)

In [None]:
# Check that snowlines and snow cover stats have the same length
rgi_ids_err = []
for rgi_id in tqdm(rgi_ids):
    snowlines_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snowlines.csv')
    snowlines = pd.read_csv(snowlines_fn)
    scs_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    scs = pd.read_csv(scs_fn)
    if len(snowlines)!=len(scs):
        print(rgi_id, len(snowlines), len(scs))
        rgi_ids_err.append(rgi_id)

In [None]:
for rgi_id in tqdm(rgi_ids_err):
    print(rgi_id)
    snowlines_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snowlines.csv')
    snowlines = pd.read_csv(snowlines_fn)
    snowlines['datetime'] = pd.to_datetime(snowlines['datetime'], format='mixed')
    snowlines_dt_ds = [list(x) for x in snowlines[['datetime', 'dataset']].values]
    scs_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    scs = pd.read_csv(scs_fn)
    scs['datetime'] = pd.to_datetime(scs['datetime'])
    scs_dt_ds = scs[['datetime', 'dataset']].values.astype(list)
    for x in snowlines_dt_ds:
        if x not in scs_dt_ds:
            print(x)
    print(' ')

In [None]:
# -----Remove "snowlines" files
for rgi_id in tqdm(rgi_ids): 
    snowlines_fn = snowlines_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_snowlines.csv')
    os.remove(snowlines_fn)