# Estimate ELAs and fit linear trendlines to $\Sigma PDDs + \Sigma Snowfall = h_{sl}$ to assess ELA sensivities

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
from tqdm.auto import tqdm
from scipy.stats import median_abs_deviation as MAD
import sys
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

In [None]:
# Define path to data
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
# scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'

## Load glacier boundaries

In [None]:
# -----Load glacier boundaries
aois_fn = os.path.join(scm_path, 'analysis', 'all_aois.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All AOIs loaded from file.')

## Estimate and save ELAs

In [None]:
# -----Monthly mean snowline altitudes
# Check if results already exist in file
obs_elas_monthly_fn = os.path.join(scm_path, 'analysis', 'observed_monthly_elas.csv')
if os.path.exists(obs_elas_monthly_fn):
    obs_elas_monthly = pd.read_csv(obs_elas_monthly_fn)
    obs_elas_monthly['Date'] = pd.to_datetime(obs_elas_monthly['Date'])
    print('Remotely-sensed monthly ELAs loaded from file.')
else:
    obs_elas_monthly = pd.DataFrame()

    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load and compile snow cover stats
        scs = pd.DataFrame()
        sc_fns = sorted(glob.glob(os.path.join(scm_path, 'study-sites', rgi_id, 'imagery', 'snowlines', '*.csv')))
        for fn in sc_fns:
            sc = pd.read_csv(fn)
            scs = pd.concat([scs, sc], axis=0)
        scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
        # Remove wonky ELA values
        scs.loc[scs['ELA_from_AAR_m'] > 1e10, 'ELA_from_AAR_m'] = np.nan
        # Add Year and Month columns
        scs['Year'] = pd.DatetimeIndex(scs['datetime']).year
        scs['Month'] = pd.DatetimeIndex(scs['datetime']).month

        # Remove pre-2016 values
        scs = scs.loc[scs['Year'] >= 2016]

        # Calculate the mean monthly snowline altitudes at each site
        site_elas_monthly = scs.groupby(by=['Year', 'Month'])['ELA_from_AAR_m'].mean().reset_index()

        # Add mean monthly PDDs and snowfall to dataframe
        era_fn = os.path.join(scm_path, 'study-sites', rgi_id, 'ERA', f'{rgi_id}_ERA5_daily_means.csv')
        era = pd.read_csv(era_fn)
        era['Date'] = pd.to_datetime(era['Date'])
        era['Year'] = pd.DatetimeIndex(era['Date']).year
        era['Month'] = pd.DatetimeIndex(era['Date']).month
        era_monthly = era.groupby(by=['Year', 'Month'])[['positive_degree_days_annual_sum', 
                                                         'mean_total_precipitation_sum_annual_sum']].mean().reset_index()
        site_elas_monthly = site_elas_monthly.merge(era_monthly, on=['Year', 'Month'])
        # Add RGI ID and minmium ELA
        site_elas_monthly['RGIId'] = rgi_id
        
        # Add to full dataframe
        obs_elas_monthly = pd.concat([obs_elas_monthly, site_elas_monthly], axis=0)
    
    # Add Date column
    obs_elas_monthly['Date'] = [np.datetime64(f'{year}-{month}-01') if month >=10 else 
                                np.datetime64(f'{year}-0{month}-01')
                                for year, month in obs_elas_monthly[['Year', 'Month']].values]

    # Reorder columns
    obs_elas_monthly = obs_elas_monthly[['RGIId', 'Date', 'Year', 'Month', 'ELA_from_AAR_m', 
                                         'positive_degree_days_annual_sum', 
                                         'mean_total_precipitation_sum_annual_sum']]
    # Save to file
    obs_elas_monthly.to_csv(obs_elas_monthly_fn, index=False)
    print('Remotely-sensed monthly ELAs saved to file:', obs_elas_monthly_fn)

obs_elas_monthly

In [None]:
# -----Annual ELAs
# Check if ELAs already exist in file
obs_elas_annual_fn = os.path.join(scm_path, 'analysis', 'observed_annual_elas.csv')
if os.path.exists(obs_elas_annual_fn):
    obs_elas_annual = pd.read_csv(obs_elas_annual_fn)
    obs_elas_annual['Date'] = pd.to_datetime(obs_elas_annual['Date'])
    print('Remotely-sensed annual ELAs loaded from file.')
else:
    # Identify indices of maximum annual ELA
    Imax = obs_elas_monthly.groupby(by=['RGIId', 'Year'])['ELA_from_AAR_m'].idxmax().dropna().values.astype(int)
    obs_elas_annual = obs_elas_monthly.loc[Imax, ['RGIId', 'Date', 'Year', 'ELA_from_AAR_m', 
                                                  'positive_degree_days_annual_sum', 'mean_total_precipitation_sum_annual_sum']]
    # Save to file
    obs_elas_annual.to_csv(obs_elas_annual_fn, index=False)
    print('Remotely-sensed annual ELAs saved to file:', obs_elas_annual_fn)

obs_elas_annual

## Fit linear models to monthly snowline altitudes

In [None]:
def scale_Xy(df, X_cols, y_cols, scaler_type=StandardScaler()):
    df_scaled = df.copy()
    scaler = scaler_type.fit(df_scaled[X_cols + y_cols])
    df_scaled[X_cols + y_cols] = scaler.transform(df_scaled[X_cols + y_cols])
    
    return df, scaler

def subset_Xy_data(X, y, p=0.9):
    # sort the X data by increasing PDDs
    Iargsort = X[:, 0].argsort()
    X_sorted, y_sorted = X[Iargsort,:], y[Iargsort]
    # select the middle p% of the data
    n10 = int(len(X)*(1-p)) # number of points in 20% of the data
    X_sorted_subset = X_sorted[int(n10/2):-int(n10/2), :]
    y_sorted_subset = y_sorted[int(n10/2):-int(n10/2)]

    return X_sorted_subset, y_sorted_subset
    
def linear_fit(X, y):
    model = LinearRegression().fit(X, y)
    score = model.score(X, y)
    coefs = np.ravel(model.coef_)
    return coefs, score
    
# Define function for K-folds cross-validation model fitting
def kfolds_linear_fit(X, y, n_folds=5):
    # Define K-folds
    kf = KFold(n_splits=n_folds)
    # Initialize parameters
    coefs_PDD, coefs_snowfall, scores = [], [], []
    # Iterate over fold indices
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        # Split X and y into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Fit model to testing
        coefs, score = linear_fit(X_train, y_train)
        coefs_PDD.append(coefs[0])
        coefs_snowfall.append(coefs[1])
        scores.append(score)
    # Calculate stats, compile in dataframe
    df = pd.DataFrame({'coef_PDD_mean': [np.nanmean(coefs_PDD)],
                       'coef_PDD_std': [np.nanstd(coefs_PDD)],
                       'coef_PDD_median': [np.nanmedian(coefs_PDD)],
                       'coef_PDD_MAD': [MAD(coefs_PDD)],
                       'coef_snowfall_mean': [np.nanmean(coefs_snowfall)],
                       'coef_snowfall_std': [np.nanstd(coefs_snowfall)],
                       'coef_snowfall_median': [np.nanmedian(coefs_snowfall)],
                       'coef_snowfall_MAD': [MAD(coefs_snowfall)],
                       'score_mean': [np.nanmean(scores)],
                       'score_median': [np.nanmedian(scores)]
                      })
    return df

In [None]:
X_cols = ['positive_degree_days_annual_sum', 'mean_total_precipitation_sum_annual_sum']
y_cols = ['ELA_from_AAR_m']

fits_obs_monthly_df = pd.DataFrame()
for rgi_id in tqdm(obs_elas_monthly['RGIId'].drop_duplicates().values):
    # subset and merge data
    site_df = obs_elas_monthly.loc[obs_elas_monthly['RGIId']==rgi_id]
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 10]
    # remove dates where PDD==0
    site_df = site_df.loc[site_df['positive_degree_days_annual_sum'] > 0]
    if len(site_df) >= 2:
        # prep the X and y data
        X = site_df[X_cols].values
        y = site_df[y_cols].values
        # subset to 80% to mitigate the impact of snowfall
        # X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
        # fit linear trendline
        fit_df = kfolds_linear_fit(X, y)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_monthly_df = pd.concat([fits_obs_monthly_df, fit_df])

fits_obs_monthly_df.reset_index(drop=True, inplace=True)

# Save to file
fits_obs_monthly_fn = os.path.join(scm_path, 'analysis', 'linear_fit_observed_monthly_ela_pdd_snowfall.csv')
fits_obs_monthly_df.to_csv(fits_obs_monthly_fn, index=False)
print('Linear fits saved to file:', fits_obs_monthly_fn)
fits_obs_monthly_df

In [None]:
plt.hist(fits_obs_monthly_df['coef_PDD_median'], bins=50)
plt.show()

In [None]:
# Print stats
print('Overall PDD coef. stats:')
print(f'\tMedian = {np.nanmedian(fits_obs_monthly_df["coef_PDD_median"])} \tMAD = {MAD(fits_obs_monthly_df["coef_PDD_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_obs_monthly_df["coef_PDD_median"])} \tstd. = {np.nanstd(fits_obs_monthly_df["coef_PDD_median"])}\n')

print(fits_obs_monthly_df.groupby(by='Subregion')['coef_PDD_median'].median())