# Estimate ELAs and melt factors of snow

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
from tqdm.auto import tqdm
from scipy.stats import median_abs_deviation as MAD
import sys
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

In [None]:
# Define path to data
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
# scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'

## Load glacier boundaries

In [None]:
# -----Load glacier boundaries
aois_fn = os.path.join(scm_path, 'analysis', 'all_aois.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All AOIs loaded from file.')

## Estimate and save ELAs

In [None]:
# -----Monthly mean snowline altitudes
# Check if results already exist in file
obs_slas_monthly_fn = os.path.join(scm_path, 'analysis', 'monthly_SLAs_observed.csv')
if os.path.exists(obs_slas_monthly_fn):
    obs_slas_monthly = pd.read_csv(obs_slas_monthly_fn)
    obs_slas_monthly['Date'] = pd.to_datetime(obs_slas_monthly['Date'])
    print('Remotely-sensed monthly SLAs loaded from file.')
else:
    obs_slas_monthly = pd.DataFrame()

    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load compiled snow cover stats
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f'{rgi_id}_snow_cover_stats.csv')
        scs = pd.read_csv(scs_fn)
        scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
        # Remove wonky ELA values
        scs.loc[scs['ELA_from_AAR_m'] > 1e10, 'ELA_from_AAR_m'] = np.nan
        # Add Year and Month columns
        scs['Year'] = pd.DatetimeIndex(scs['datetime']).year
        scs['Month'] = pd.DatetimeIndex(scs['datetime']).month

        # Remove pre-2016 values
        scs = scs.loc[scs['Year'] >= 2016]

        # Calculate the mean monthly snowline altitudes at each site
        site_slas_monthly = scs.groupby(by=['Year', 'Month'])['ELA_from_AAR_m'].mean().reset_index()

        # Add mean monthly PDDs and snowfall to dataframe
        era_fn = os.path.join(scm_path, 'study-sites', rgi_id, 'ERA', f'{rgi_id}_ERA5_daily_means.csv')
        era = pd.read_csv(era_fn)
        era['Date'] = pd.to_datetime(era['Date'])
        era['Year'] = pd.DatetimeIndex(era['Date']).year
        era['Month'] = pd.DatetimeIndex(era['Date']).month
        era_monthly = era.groupby(by=['Year', 'Month'])[['positive_degree_days_annual_sum', 
                                                         'mean_snowfall_sum_annual_sum']].mean().reset_index()
        site_slas_monthly = site_slas_monthly.merge(era_monthly, on=['Year', 'Month'])
        # Add RGI ID and minmium ELA
        site_slas_monthly['RGIId'] = rgi_id
        
        # Add to full dataframe
        obs_slas_monthly = pd.concat([obs_slas_monthly, site_slas_monthly], axis=0)
    
    # Add Date column
    obs_slas_monthly['Date'] = [np.datetime64(f'{year}-{month}-01') if month >=10 else 
                                np.datetime64(f'{year}-0{month}-01')
                                for year, month in obs_slas_monthly[['Year', 'Month']].values]

    # Reorder columns
    obs_slas_monthly = obs_slas_monthly[['RGIId', 'Date', 'Year', 'Month', 'ELA_from_AAR_m', 
                                         'positive_degree_days_annual_sum', 
                                         'mean_snowfall_sum_annual_sum']]
    
    # Save to file
    obs_slas_monthly.to_csv(obs_slas_monthly_fn, index=False)
    print('Remotely-sensed monthly SLAs saved to file:', obs_slas_monthly_fn)

obs_slas_monthly

In [None]:
# -----Annual ELAs
# Check if ELAs already exist in file
obs_elas_annual_fn = os.path.join(scm_path, 'analysis', 'annual_ELAs_observed.csv')
if os.path.exists(obs_elas_annual_fn):
    obs_elas_annual = pd.read_csv(obs_elas_annual_fn)
    obs_elas_annual['Date'] = pd.to_datetime(obs_elas_annual['Date'])
    print('Remotely-sensed ELAs loaded from file.')
else:
    # Identify indices of maximum annual ELA
    Imax = obs_slas_monthly.groupby(by=['RGIId', 'Year'])['ELA_from_AAR_m'].idxmax().dropna().values.astype(int)
    obs_elas_annual = obs_slas_monthly.loc[Imax, ['RGIId', 'Date', 'Year', 'ELA_from_AAR_m', 
                                                  'positive_degree_days_annual_sum', 'mean_snowfall_sum_annual_sum']]
    # Save to file
    obs_elas_annual.to_csv(obs_elas_annual_fn, index=False)
    print('Remotely-sensed ELAs saved to file:', obs_elas_annual_fn)

obs_elas_annual

## Fit linear models to monthly snowline altitudes to estimate melt factors of snow, $f_{snow}$:

$f_{snow} * \Sigma PDDs + \Sigma Snowfall = h_{sl}$

...

$f_{snow} * \Sigma PDDs = h_{sl} - \Sigma Snowfall$


In [None]:
def scale_Xy(df, X_cols, y_cols, scaler_type=StandardScaler()):
    df_scaled = df.copy()
    scaler = scaler_type.fit(df_scaled[X_cols + y_cols])
    df_scaled[X_cols + y_cols] = scaler.transform(df_scaled[X_cols + y_cols])
    return df_scaled, scaler

def linear_fit(X_train, X_test, y_train, y_test):
    model = LinearRegression().fit(X_train, y_train)
    score = model.score(X_test, y_test)
    coefs = np.ravel(model.coef_)
    return coefs, score
    
# Define function for K-folds cross-validation model fitting
def kfolds_linear_fit(X, y, n_folds=5):
    # Define K-folds
    kf = KFold(n_splits=n_folds)
    # Initialize parameters
    fsnows, scores = [], []
    # Iterate over fold indices
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        # Split X and y into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Fit model to testing
        fsnow, score = linear_fit(X_train, X_test, y_train, y_test)
        fsnows.append(fsnow[0])
        scores.append(score)
    # Calculate stats, compile in dataframe
    df = pd.DataFrame({'fsnow_mean': [np.nanmean(fsnow)],
                       'fsnow_std': [np.nanstd(fsnow)],
                       'fsnow_median': [np.nanmedian(fsnow)],
                       'fsnow_MAD': [MAD(fsnow)],
                       'score_mean': [np.nanmean(scores)],
                       'score_median': [np.nanmedian(scores)]
                      })
    return df

In [None]:
fits_obs_monthly_df = pd.DataFrame()
for rgi_id in tqdm(obs_slas_monthly['RGIId'].drop_duplicates().values):
    # subset and merge data
    site_df = obs_slas_monthly.loc[obs_slas_monthly['RGIId']==rgi_id]
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 10]
    # remove dates where PDD==0
    site_df = site_df.loc[site_df['positive_degree_days_annual_sum'] > 0]
    if len(site_df) >= 2:
        X = site_df['positive_degree_days_annual_sum'].values.reshape(-1,1)
        y = (site_df['ELA_from_AAR_m'] - site_df['mean_snowfall_sum_annual_sum']).values
        # fit linear trendlines using K-folds cross-validation
        fit_df = kfolds_linear_fit(X, y)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_monthly_df = pd.concat([fits_obs_monthly_df, fit_df])

fits_obs_monthly_df.reset_index(drop=True, inplace=True)

# Save to file
fits_obs_monthly_fn = os.path.join(scm_path, 'analysis', 'linear_fit_observed_monthly_ela_pdd_snowfall.csv')
fits_obs_monthly_df.to_csv(fits_obs_monthly_fn, index=False)
print('Linear fits saved to file:', fits_obs_monthly_fn)
fits_obs_monthly_df

In [None]:
plt.hist(fits_obs_monthly_df['fsnow_median'], bins=50)
print("Median f_snow = ", np.nanmedian(fits_obs_monthly_df['fsnow_median']))
plt.show()

In [None]:
# Print stats
print('Overall f_snow stats:')
print(f'\tMedian = {np.nanmedian(fits_obs_monthly_df["fsnow_median"])} \tMAD = {MAD(fits_obs_monthly_df["fsnow_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_obs_monthly_df["fsnow_median"])} \tstd. = {np.nanstd(fits_obs_monthly_df["fsnow_median"])}\n')

print(fits_obs_monthly_df.groupby(by='Subregion')['fsnow_median'].median())