# Compare ELA/AAR estimates if we were to use late September observations

In [86]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from ast import literal_eval
from shapely.geometry import LineString
import numpy as np
from pyproj import Transformer
from concurrent.futures import ThreadPoolExecutor
from functools import partial


In [50]:
# Path to study-sites
study_sites_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites'
# Get all site names
rgi_ids = sorted(glob.glob(os.path.join(study_sites_path, 'RGI*')))
rgi_ids = [os.path.basename(x) for x in rgi_ids]
print(f'{len(rgi_ids)} sites')
rgi_ids

200 sites


['RGI60-01.00032',
 'RGI60-01.00033',
 'RGI60-01.00037',
 'RGI60-01.00038',
 'RGI60-01.00046',
 'RGI60-01.00312',
 'RGI60-01.00566',
 'RGI60-01.00570',
 'RGI60-01.00576',
 'RGI60-01.00675',
 'RGI60-01.01104',
 'RGI60-01.01151',
 'RGI60-01.01390',
 'RGI60-01.01524',
 'RGI60-01.01733',
 'RGI60-01.03594',
 'RGI60-01.03622',
 'RGI60-01.03861',
 'RGI60-01.04375',
 'RGI60-01.04624',
 'RGI60-01.06268',
 'RGI60-01.06279',
 'RGI60-01.06722',
 'RGI60-01.08155',
 'RGI60-01.08174',
 'RGI60-01.08246',
 'RGI60-01.08248',
 'RGI60-01.08262',
 'RGI60-01.08288',
 'RGI60-01.08296',
 'RGI60-01.08302',
 'RGI60-01.08336',
 'RGI60-01.08353',
 'RGI60-01.08389',
 'RGI60-01.08395',
 'RGI60-01.08403',
 'RGI60-01.08412',
 'RGI60-01.08427',
 'RGI60-01.09148',
 'RGI60-01.09162',
 'RGI60-01.09216',
 'RGI60-01.09411',
 'RGI60-01.09639',
 'RGI60-01.10196',
 'RGI60-01.10555',
 'RGI60-01.10689',
 'RGI60-01.10778',
 'RGI60-01.10851',
 'RGI60-01.10857',
 'RGI60-01.11616',
 'RGI60-01.11654',
 'RGI60-01.11788',
 'RGI60-01.1

In [None]:
# Define columns to save
out_cols = ['RGIId', 'datetime', 'source', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 'snowline_elevs_m', 'snowline_elevs_median_m', 'snowline_geometry']

# Define function to convert lists of X and Y coordinates to a LineString and transform to WGS84
def create_linestring_wgs84(x_coords, y_coords, transformer):
    # Create list of (x, y) coordinate tuples
    points = list(zip(x_coords, y_coords))
    # Create LineString in UTM
    line = LineString(points)
    # Transform to WGS84
    line_wgs84 = LineString([transformer.transform(x, y) for x, y in line.coords])
    return line_wgs84

# Define function to merge and adjust snow cover stats files
def merge_adjust_scs(rgi_id, study_sites_path):
    # Define output file name
    scs_fn = os.path.join(study_sites_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    if not os.path.exists(scs_fn):
        # Initialize dataframe
        scs = pd.DataFrame()
        # Get snow cover stats file names
        sc_fns = sorted(glob.glob(os.path.join(study_sites_path, rgi_id, 'imagery', 'snowlines', '*_snowline.csv')))
        # Merge files
        for fn in sc_fns:
            if 'PlanetScope' not in fn:
                sc = pd.read_csv(fn)
                scs = pd.concat([scs, sc])
        # Merge redundant columns
        cols = list(scs.keys())
        if ('site_name' in cols) & ('RGIId' in cols):
            scs['RGIId'] = scs['RGIId'].fillna(scs['site_name'])
        elif 'site_name' in cols:
            scs.rename(columns={'site_name': 'RGIId'}, inplace=True)
        if ('dataset' in cols) & ('source' in cols):
            scs['source'] = scs['source'].fillna(scs['dataset'])
        elif 'dataset' in cols:
            scs.rename(columns={'dataset': 'source'}, inplace=True)
        scs.reset_index(drop=True, inplace=True)
        # Ensure coordinate lists are correctly formatted
        scs['snowlines_coords_X'] = scs['snowlines_coords_X'].apply(lambda x: literal_eval(x) if x != "[]" else [])
        scs['snowlines_coords_Y'] = scs['snowlines_coords_Y'].apply(lambda x: literal_eval(x) if x != "[]" else [])
        # Get the UTM CRS for transformation
        crs_utm = scs['HorizontalCRS'].drop_duplicates().dropna().values[0]
        transformer = Transformer.from_crs(crs_utm, "EPSG:4326", always_xy=True)
        # Create and transform snowline geometries
        scs['snowline_geometry'] = scs.apply(lambda row: create_linestring_wgs84(row['snowlines_coords_X'], row['snowlines_coords_Y'], transformer), axis=1)
        # Select the relevant columns
        scs = scs[out_cols]
        # Save merged, adjusted dataframe
        scs.to_csv(scs_fn, index=False)

# Run the process in parallel
with ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(partial(merge_adjust_scs, study_sites_path=study_sites_path), rgi_ids), total=len(rgi_ids)))



  0%|          | 0/200 [00:00<?, ?it/s]

## Estimate AARs and ELAs annually from 2016 on

In [87]:
# Function to grab the row closest to October 1
def closest_to_october_1(group):
    # Define October 1 of that year
    target_date = pd.Timestamp(f"{group['year'].iloc[0]}-10-01")
    # Calculate the absolute difference in days between each date and October 1
    group['days_diff'] = group['datetime'] - target_date
    # Select the row with the minimum difference
    closest_row = group.loc[group['days_diff'].idxmax()]
    return closest_row

# Define the function to process each rgi_id
def process_rgi_id(rgi_id, study_sites_path):
    # Define the file path for the current rgi_id
    scs_fn = os.path.join(study_sites_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
    scs = pd.read_csv(scs_fn)
    scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
    scs['year'] = scs['datetime'].dt.year
    # Calculate annual minimum AAR observation
    Iobs = scs.groupby(scs['datetime'].dt.year)['AAR'].idxmin()
    min_obs = scs.iloc[Iobs]
    # Grab the closest in time to October 1 each year (before October 1)
    min_oct = scs.groupby(scs['year']).apply(closest_to_october_1).reset_index(drop=True)
    # Merge observations and October 1 closest
    cols = ['datetime', 'source', 'AAR', 'ELA_from_AAR_m', 'year']
    min_merged = min_obs[cols].merge(min_oct[cols], on='year', how='outer', suffixes=['_obs', '_Oct'])
    # Subset to post-2016 and add RGIId
    min_merged = min_merged.loc[min_merged['year'] >= 2016]
    min_merged['RGIId'] = rgi_id
    return min_merged

# Run the processing in parallel
with ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(partial(process_rgi_id, study_sites_path=study_sites_path), rgi_ids), total=len(rgi_ids)))

# Concatenate all results into a single DataFrame
aars_elas_df = pd.concat(results).reset_index(drop=True)
aars_elas_df


  0%|          | 0/200 [00:00<?, ?it/s]

Unnamed: 0,datetime_obs,source_obs,AAR_obs,ELA_from_AAR_m_obs,year,datetime_Oct,source_Oct,AAR_Oct,ELA_from_AAR_m_Oct,RGIId
0,2016-08-31 15:35:29,Sentinel-2_TOA,0.353411,2297.331300,2016,2016-10-25 21:07:35,Landsat,0.995128,1408.870400,RGI60-01.00032
1,2017-08-06 15:35:30,Sentinel-2_TOA,0.087793,2770.615200,2017,2017-10-20 21:35:15,Sentinel-2_TOA,0.994999,1408.981100,RGI60-01.00032
2,2018-07-31 15:16:44,Sentinel-2_TOA,0.176663,2559.151900,2018,2018-10-30 21:36:21,Sentinel-2_TOA,0.995994,1408.219400,RGI60-01.00032
3,2019-07-22 15:37:48,Sentinel-2_TOA,0.051490,2856.405300,2019,2019-10-22 15:27:45,Sentinel-2_TOA,0.869389,1608.387500,RGI60-01.00032
4,2020-08-05 15:37:42,Sentinel-2_TOA,0.295257,2375.035200,2020,2020-10-29 21:37:38,Sentinel-2_SR,0.995918,1408.261400,RGI60-01.00032
...,...,...,...,...,...,...,...,...,...,...
1593,2019-08-19 18:00:00,PlanetScope,0.039822,2109.720000,2019,2019-10-30 19:11:11,Sentinel-2_TOA,0.871716,1804.710000,RGI60-02.18778
1594,2020-10-17 13:21:10,Sentinel-2_SR,0.001433,2178.168889,2020,2020-10-29 19:11:11,Sentinel-2_SR,0.917690,1776.799146,RGI60-02.18778
1595,2021-06-16 18:00:00,PlanetScope,0.009312,2150.500000,2021,2021-10-30 18:00:00,PlanetScope,0.769097,1827.580000,RGI60-02.18778
1596,2022-10-12 19:21:03,Sentinel-2_TOA,0.095295,2077.210000,2022,2022-10-29 19:11:07,Sentinel-2_SR,0.996450,1665.960506,RGI60-02.18778


## Estimate AARs by stacking years, using weekly medians