# Estimate and compare ELAs from observations and modeled conditions

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
from tqdm.auto import tqdm
from scipy.stats import median_abs_deviation as MAD
from scipy.interpolate import CubicSpline
import sys
import seaborn as sns
# Suppress future warning from pandas
import warnings
warnings.filterwarnings("ignore")
import matplotlib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import ruptures as rpt
from scipy import optimize

In [None]:
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/'
sys.path.append(os.path.join(base_path, 'functions'))
import model_analyze_utils as f

# scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'

## Load glacier boundaries, ERA5-Land data, and compiled snow cover stats

In [None]:
# -----Load glacier boundaries with climate clusters
aois_fn = os.path.join(scm_path, 'compiled_data', 'all_aois_climate_cluster.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All AOIs with climate clusters loaded from file.')

# -----Load ERA data
eras_fn = os.path.join(scm_path, 'compiled_data', 'all_era_data.csv')
eras = pd.read_csv(eras_fn)
# format dates as datetimes
eras['Date'] = pd.to_datetime(eras['Date'])
# rename "site_name" column to "RGIId"
eras.rename(columns={'site_name': 'RGIId'}, inplace=True)
# Add Year and Month columns
eras['Year'] = pd.DatetimeIndex(eras['Date']).year
eras['Month'] = pd.DatetimeIndex(eras['Date']).month
print('All ERA data loaded from file.')

# -----Load compiled snow cover statts
scs_fn = os.path.join(scm_path, 'compiled_data', 'all_snow_cover_stats.csv')
scs = pd.read_csv(scs_fn)
scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
# Remove wonky ELA values
scs.loc[scs['ELA_from_AAR_m'] > 1e10, 'ELA_from_AAR_m'] = np.nan
# Add Year and Month columns
scs['Year'] = pd.DatetimeIndex(scs['datetime']).year
scs['Month'] = pd.DatetimeIndex(scs['datetime']).month
print('All snow cover stats loaded from file.')

## Estimate and save ELAs

### Modeled

In [None]:
# Functions for linearly extrapolating the ELA when modeled SMB < 0 everywhere
def linear_fit(x, m, b):
    return m*x + b
    
def extrapolate_ela_linear(X,y, Iend=8):
    # optimize the linear fit
    p, e = optimize.curve_fit(linear_fit, X[0:Iend+1], y[0:Iend+1])
    # extrapolate where y=0
    ela = linear_fit(0, *p)
    return ela

# def extrapolate_ela_piecewise_linear(X,y):
#     # identify breakpoints
#     algo = rpt.Pelt(model="rbf").fit(signal)
#     result = algo.predict(pen=10)
                                 
# def extrapolate_ela_cubic_spline(X,y):
#     # check that X is increasing
#     if X[1] < X[0]:
#         spline = CubicSpline(np.flip(X), np.flip(y), bc_type='natural')
#     else:
#         spline = CubicSpline(X, y, bc_type='natural')
#     ela = spline(0)
    
#     return ela

In [None]:
# -----Monthly ELAs
mod_elas_monthly_fn = os.path.join(scm_path, 'results', 'modeled_monthly_elas.csv')
if os.path.exists(mod_elas_monthly_fn):
    mod_elas_monthly = pd.read_csv(mod_elas_monthly_fn)
    mod_elas_monthly['Date'] = pd.DatetimeIndex(mod_elas_monthly['Date'])
    print('Modeled monthly ELAs loaded from file.')
else:
    
    # load binned model data
    bin_fns = sorted(glob.glob(os.path.join(scm_path, 'Rounce_et_al_2023', 'binned', '*.nc')))
    
    # remove binned file names for sites without snow cover observations
    aoi_ids = [x[7:] for x in aois['RGIId'].drop_duplicates().values]
    bin_fns = [x for x in bin_fns if os.path.basename(x)[0:7] in aoi_ids]

    # initialize dataframe for results
    mod_elas_monthly = pd.DataFrame()

    # iterate over binned file names
    i=0
    for bin_fn in tqdm(bin_fns):
        # open binned data
        bin = xr.open_dataset(bin_fn)
        rgi_id = bin.RGIId.data[0] # grab RGI ID

        # grab data variables
        h = bin.bin_surface_h_initial.data[0] # surface elevation [m]
        b_sum = np.zeros((len(bin.time.data), len(h))) # cumulative SMB
        times = [np.datetime64(x) for x in bin.time.data] # datetimes
        months = list(pd.DatetimeIndex(times).month) # months of each datetime
        elas = np.zeros(len(times)) # initialize transient ELAs

        # iterate over each time period
        for j, time in enumerate(times):
            # subset binned data to time
            bin_time = bin.isel(time=j)
            # grab the SMB 
            b_sum[j,:] = bin_time.bin_massbalclim_monthly.data[0]
            # add the previous SMB (restart the count in October)
            if months[j] != 10: 
                b_sum[j,:] += b_sum[j-1,:]
            # If all SMB > 0, ELA = minimum elevation
            if all(b_sum[j,:] > 0):
                elas[j] = np.min(h)
            # If SMB is > 0 and < 0 in some places, linearly interpolate ELA
            elif any(b_sum[j,:] < 0) & any(b_sum[j,:] > 0):
                elas[j] = np.interp(0, np.flip(b_sum[j,:]), np.flip(h))
            # If SMB < 0 everywhere, fit a piecewise linear fit and extrapolate for SMB=0
            elif all(b_sum[j,:] < 0):
                X, y = b_sum[j,:], h
                elas[j] = extrapolate_ela_linear(X, y, Iend=5)

        # compile in dataframe
        df = pd.DataFrame({'Date': times,
                           'ELA_m': elas})
        
        # Because each SMB value represents the total SMB for each month, add 1 month to the dates
        df['Date'] = df['Date'] + pd.DateOffset(months=1)
        df['RGIId'] = rgi_id

        # Add ERA5 data for each date
        eras_site = eras.loc[eras['RGIId']==rgi_id]
        eras_site = eras_site[['Date', 'Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']]
        df = df.merge(eras_site, on='Date')
        mod_elas_monthly = pd.concat([mod_elas_monthly, df])
            
        i+=1

    # Rearrange columns
    mod_elas_monthly = mod_elas_monthly[['RGIId', 'Date', 'ELA_m', 
                                         'Cumulative_Positive_Degree_Days', 
                                         'Cumulative_Snowfall_mwe']]
    # save to file
    mod_elas_monthly.to_csv(mod_elas_monthly_fn, index=False)
    print('Modeled transient ELAs saved to file:', mod_elas_monthly_fn)

mod_elas_monthly.reset_index(drop=True, inplace=True)
mod_elas_monthly

In [None]:
# -----Annual ELAs
mod_elas_annual_fn = os.path.join(scm_path, 'results', 'modeled_annual_elas.csv')
if os.path.exists(mod_elas_annual_fn):
    mod_elas_annual = pd.read_csv(mod_elas_annual_fn)
    mod_elas_annual['Date'] = pd.to_datetime(mod_elas_annual['Date'])
    print('Modeled annual ELAs loaded from file.')
else:
    # Add Year column
    mod_elas_monthly['Year'] = pd.DatetimeIndex(mod_elas_monthly['Date']).year
    # Identify the row of maximum ELA for each site and each year
    Imax = mod_elas_monthly.groupby(by=['RGIId', 'Year'])['ELA_m'].idxmax().values
    mod_elas_annual = mod_elas_monthly.iloc[Imax].reset_index(drop=True)
    # Reorder columns
    mod_elas_monthly = mod_elas_monthly[['RGIId', 'Date', 'Year', 'ELA_m', 
                                         'Cumulative_Positive_Degree_Days',
                                         'Cumulative_Snowfall_mwe']]
    # Save to file
    mod_elas_annual.to_csv(mod_elas_annual_fn, index=False)
    print('Modeled annual ELAs saved to file:', mod_elas_annual_fn)
    
mod_elas_annual

### Remotely-sensed

In [None]:
# -----Monthly mean ELAs
# Check if ELAs already exist in file
obs_elas_monthly_fn = os.path.join(scm_path, 'results', 'observed_monthly_elas.csv')
if os.path.exists(obs_elas_monthly_fn):
    obs_elas_monthly = pd.read_csv(obs_elas_monthly_fn)
    obs_elas_monthly['Date'] = pd.to_datetime(obs_elas_monthly['Date'])
    print('Remotely-sensed monthly ELAs loaded from file.')
else:
    # Add Year and Month columns to snow cover stats
    scs['Year'] = pd.DatetimeIndex(scs['datetime']).year
    scs['Month'] = pd.DatetimeIndex(scs['datetime']).month
    
    # Calculate the mean. monthly ELA at each site
    obs_elas_monthly = scs.groupby(by=['RGIId', 'Year', 'Month'])['ELA_from_AAR_m'].mean().reset_index()

    # Add mean monthly PDDs and snowfall
    eras['Year'] = pd.DatetimeIndex(eras['Date']).year
    eras['Month'] = pd.DatetimeIndex(eras['Date']).month
    eras_monthly = eras.groupby(by=['RGIId', 'Year', 'Month'])[['Cumulative_Positive_Degree_Days', 
                                                                'Cumulative_Snowfall_mwe']].mean().reset_index()
    obs_elas_monthly = obs_elas_monthly.merge(eras_monthly, on=['RGIId', 'Year', 'Month'])
    
    # Add Date column
    obs_elas_monthly['Date'] = [np.datetime64(f'{year}-{month}-01') if month >=10 else 
                                np.datetime64(f'{year}-0{month}-01')
                                for year, month in obs_elas_monthly[['Year', 'Month']].values]

    # Reorder columns
    obs_elas_monthly = obs_elas_monthly[['RGIId', 'Date', 'Year', 'Month', 'ELA_from_AAR_m', 
                                         'Cumulative_Positive_Degree_Days', 
                                         'Cumulative_Snowfall_mwe']]
    # Save to file
    obs_elas_monthly.to_csv(obs_elas_monthly_fn, index=False)
    print('Remotely-sensed monthly ELAs saved to file:', obs_elas_monthly_fn)

obs_elas_monthly

In [None]:
# -----Annual ELAs
# Check if ELAs already exist in file
obs_elas_annual_fn = os.path.join(scm_path, 'results', 'observed_annual_elas.csv')
if os.path.exists(obs_elas_annual_fn):
    obs_elas_annual = pd.read_csv(obs_elas_annual_fn)
    obs_elas_annual['Date'] = pd.to_datetime(obs_elas_annual['Date'])
    print('Remotely-sensed annual ELAs loaded from file.')
else:
    # Identify indices of maximum annual ELA
    Imax = scs.groupby(by=['RGIId', 'Year'])['ELA_from_AAR_m'].idxmax().dropna().values.astype(int)
    obs_elas_annual = scs[['RGIId', 'datetime', 'Year', 'ELA_from_AAR_m']].iloc[Imax]
    obs_elas_annual.rename(columns={'datetime': 'Date'}, inplace=True)
    obs_elas_annual['Date'] = obs_elas_annual['Date'].values.astype('datetime64[D]')
    # Grab with ERA5 data for each date
    obs_elas_annual = obs_elas_annual.merge(eras[['RGIId', 'Date', 
                                                  'Cumulative_Positive_Degree_Days', 
                                                  'Cumulative_Snowfall_mwe']], on=['RGIId', 'Date'])
    # Save to file
    obs_elas_annual.to_csv(obs_elas_annual_fn, index=False)
    print('Remotely-sensed annual ELAs saved to file:', obs_elas_annual_fn)

obs_elas_annual

## Compare modeled to observed ELAs

### Monthly ELAs

In [None]:
# Merge modeled and remotely-sensed modeled ELAs
elas_monthly_merged = mod_elas_monthly[['RGIId', 'Date', 'ELA_m']].merge(obs_elas_monthly[['RGIId', 'Date', 'ELA_from_AAR_m']], 
                                                                         on=['RGIId', 'Date'])
# Initialize dataframe for correlation coefficients
corr_coefs_df = pd.DataFrame()
# Iterate over RGI IDs
for rgi_id in elas_monthly_merged['RGIId'].drop_duplicates().values:
    # Subtract the minimum ELA for each site to avoid datum issues, s.t. ELAs are with respect to zero meters
    elas_monthly_merged.loc[elas_monthly_merged['RGIId']==rgi_id, 'ELA_m'] -= mod_elas_monthly.loc[mod_elas_monthly['RGIId']==rgi_id, 'ELA_m'].min()
    elas_monthly_merged.loc[elas_monthly_merged['RGIId']==rgi_id, 'ELA_from_AAR_m'] -= scs.loc[scs['RGIId']==rgi_id, 'ELA_from_AAR_m'].min()
    # Calculate correlation coefficient
    elas_monthly_merged_site = elas_monthly_merged.loc[elas_monthly_merged['RGIId']==rgi_id]
    corr_coef_site = elas_monthly_merged_site[['ELA_from_AAR_m', 'ELA_m']].corr().values[0][1]
    corr_coef_df = pd.DataFrame({'RGIId': [rgi_id], 
                                 'Correlation Coefficient': [corr_coef_site]})
    corr_coefs_df = pd.concat([corr_coefs_df, corr_coef_df])

# Rename columns
corr_coefs_df.reset_index(drop=True, inplace=True)
elas_monthly_merged.rename(columns={'ELA_m': 'ELA_mod_m', 'ELA_from_AAR_m': 'ELA_obs_m'}, inplace=True)

# Plot
fig, ax = plt.subplots(1, 2, figsize=(12,5))
ax[0].hist(elas_monthly_merged['ELA_obs_m'] - elas_monthly_merged['ELA_mod_m'], bins=50)
ax[0].set_xlabel('ELA$_{obs}$ - ELA$_{mod}$ [m]')
ax[0].set_ylabel('Counts')
ax[1].hist(corr_coefs_df['Correlation Coefficient'], bins=50)
ax[1].set_xlabel('Correlation Coefficients')
plt.show()

# Save results
elas_monthly_merged_fn = os.path.join(scm_path, 'results', 'ELAs_monthly_mod_obs_merged.csv')
elas_monthly_merged.to_csv(elas_monthly_merged_fn, index=False)
print('Mered monthly ELAs saved to file:', elas_monthly_merged_fn)
corr_coefs_fn = os.path.join(scm_path, 'results', 'ELAs_monthly_correlation_coefficients.csv')
corr_coefs_df.to_csv(corr_coefs_fn, index=False)
print('Correlation coefficients saved to file:', corr_coefs_fn)

print('\nDifference stats:')
print(f'Mean diff = {np.nanmean((elas_monthly_merged["ELA_obs_m"] - elas_monthly_merged["ELA_mod_m"]).values)} m')
print(f'Std. diff = {np.nanstd((elas_monthly_merged["ELA_obs_m"] - elas_monthly_merged["ELA_mod_m"]).values)} m')
print(f'Median diff = {np.nanmedian((elas_monthly_merged["ELA_obs_m"] - elas_monthly_merged["ELA_mod_m"]).values)} m')
print(f'MAD diff = {MAD((elas_monthly_merged["ELA_obs_m"] - elas_monthly_merged["ELA_mod_m"]).values, nan_policy="omit")} m')

print('\n Corr. Coef. stats:')
print('Mean = ', np.nanmean(corr_coefs_df['Correlation Coefficient']))
print('Std. = ', np.nanstd(corr_coefs_df['Correlation Coefficient']))
print('Median = ', np.nanmedian(corr_coefs_df['Correlation Coefficient']))
print('MAD = ', MAD(corr_coefs_df['Correlation Coefficient'], nan_policy="omit"))

### Annual ELAs

In [None]:
# Merge modeled and remotely-sensed modeled ELAs
elas_annual_merged = obs_elas_annual[['RGIId', 'Year', 'ELA_from_AAR_m']].merge(mod_elas_annual[['RGIId', 'Year', 'ELA_m']],
                                                                                on=['RGIId', 'Year'])
# Initialize dataframe for correlation coefficients
corr_coefs_df = pd.DataFrame()
# Iterate over RGI IDs
for rgi_id in elas_annual_merged['RGIId'].drop_duplicates().values:
    # Subtract the minimum ELA for each site to avoid datum issues, s.t. ELAs are with respect to zero meters
    elas_annual_merged.loc[elas_annual_merged['RGIId']==rgi_id, 'ELA_m'] -= mod_elas_monthly.loc[mod_elas_monthly['RGIId']==rgi_id, 'ELA_m'].min()
    elas_annual_merged.loc[elas_annual_merged['RGIId']==rgi_id, 'ELA_from_AAR_m'] -= scs.loc[scs['RGIId']==rgi_id, 'ELA_from_AAR_m'].min()
    # Calculate correlation coefficient
    elas_annual_merged_site = elas_annual_merged.loc[elas_annual_merged['RGIId']==rgi_id]
    corr_coef_site = elas_annual_merged_site[['ELA_from_AAR_m', 'ELA_m']].corr().values[0][1]
    corr_coef_df = pd.DataFrame({'RGIId': [rgi_id], 
                                 'Correlation Coefficient': [corr_coef_site]})
    corr_coefs_df = pd.concat([corr_coefs_df, corr_coef_df])
    
# Rename columns
corr_coefs_df.reset_index(drop=True, inplace=True)
elas_annual_merged.rename(columns={'ELA_m': 'ELA_mod_m', 'ELA_from_AAR_m': 'ELA_obs_m'}, inplace=True)

# Plot
fig, ax = plt.subplots(1, 2, figsize=(12,5))
ax[0].hist(elas_annual_merged['ELA_obs_m'] - elas_annual_merged['ELA_mod_m'], bins=50)
ax[0].set_xlabel('ELA$_{obs}$ - ELA$_{mod}$ [m]')
ax[0].set_ylabel('Counts')
ax[1].hist(corr_coefs_df['Correlation Coefficient'], bins=50)
ax[1].set_xlabel('Correlation Coefficients')
plt.show()

# Save results
elas_annual_merged_fn = os.path.join(scm_path, 'results', 'ELAs_annual_mod_obs_merged.csv')
elas_annual_merged.to_csv(elas_annual_merged_fn, index=False)
print('Mered monthly ELAs saved to file:', elas_annual_merged_fn)
corr_coefs_fn = os.path.join(scm_path, 'results', 'ELAs_annual_correlation_coefficients.csv')
corr_coefs_df.to_csv(corr_coefs_fn, index=False)
print('Correlation coefficients saved to file:', corr_coefs_fn)

print('\nDifference stats:')
print(f'Mean diff = {np.nanmean((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'Std. diff = {np.nanstd((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'Median diff = {np.nanmedian((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'MAD diff = {MAD((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values, nan_policy="omit")} m')

print('\nCorr. Coef. stats:')
print('Mean = ', np.nanmean(corr_coefs_df['Correlation Coefficient']))
print('Std. = ', np.nanstd(corr_coefs_df['Correlation Coefficient']))
print('Median = ', np.nanmedian(corr_coefs_df['Correlation Coefficient']))
print('MAD = ', MAD(corr_coefs_df['Correlation Coefficient'], nan_policy="omit"))

## Fit linear trendlines PDD sum + Snowfall sum. = ELA

In [None]:
def scale_Xy(df, X_cols, y_cols, scaler_type=StandardScaler()):
    df_scaled = df.copy()
    scaler = scaler_type.fit(df_scaled[X_cols + y_cols])
    df_scaled[X_cols + y_cols] = scaler.transform(df_scaled[X_cols + y_cols])
    
    return df, scaler

def subset_Xy_data(X, y, p=0.9):
    # sort the X data by increasing PDDs
    Iargsort = X[:, 0].argsort()
    X_sorted, y_sorted = X[Iargsort,:], y[Iargsort]
    # select the middle p% of the data
    n10 = int(len(X)*(1-p)) # number of points in 20% of the data
    X_sorted_subset = X_sorted[int(n10/2):-int(n10/2), :]
    y_sorted_subset = y_sorted[int(n10/2):-int(n10/2)]

    return X_sorted_subset, y_sorted_subset
    
def linear_fit(X, y):
    model = LinearRegression().fit(X, y)
    score = model.score(X, y)
    coefs = np.ravel(model.coef_)
    return coefs, score
    
# Define function for K-folds cross-validation model fitting
def kfolds_linear_fit(X, y, n_folds=5):
    # Define K-folds
    kf = KFold(n_splits=n_folds)
    # Initialize parameters
    coefs_PDD, coefs_snowfall, scores = [], [], []
    # Iterate over fold indices
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        # Split X and y into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Fit model to testing
        coefs, score = linear_fit(X_train, y_train)
        coefs_PDD.append(coefs[0])
        coefs_snowfall.append(coefs[1])
        scores.append(score)
    # Calculate stats, compile in dataframe
    df = pd.DataFrame({'coef_PDD_mean': [np.nanmean(coefs_PDD)],
                       'coef_PDD_std': [np.nanstd(coefs_PDD)],
                       'coef_PDD_median': [np.nanmedian(coefs_PDD)],
                       'coef_PDD_MAD': [MAD(coefs_PDD)],
                       'coef_snowfall_mean': [np.nanmean(coefs_snowfall)],
                       'coef_snowfall_std': [np.nanstd(coefs_snowfall)],
                       'coef_snowfall_median': [np.nanmedian(coefs_snowfall)],
                       'coef_snowfall_MAD': [MAD(coefs_snowfall)],
                       'score_mean': [np.nanmean(scores)],
                       'score_median': [np.nanmedian(scores)]
                      })
    return df

### Modeled monthly ELAs

In [None]:
# Scale the input data
X_cols = ['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']
y_cols = ['ELA_m']
# mod_elas_monthly_scaled, scaler = scale_Xy(mod_elas_monthly, X_cols, y_cols)

# Fit linear trendlines to ELAs + Snowfall = PDDs
fits_mod_monthly_df = pd.DataFrame()
for rgi_id in tqdm(mod_elas_monthly['RGIId'].drop_duplicates().values):
    # subset and merge data
    site_df = mod_elas_monthly.loc[mod_elas_monthly_scaled['RGIId']==rgi_id]
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 9]
    # remove dates where PDDs==0
    site_df.loc[site_df['Cumulative_Positive_Degree_Days'] > 0].reset_index(drop=True, inplace=True)
    # prep the X and y data
    X = site_df[X_cols].values
    y = site_df[y_cols].values
    # subset to 80% to mitigate the impact of snowfall
    # X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
    # fit linear trendline
    fit_df = kfolds_linear_fit(X, y)
    fit_df['RGIId'] = rgi_id
    # add RGI regions and climate cluster to df
    for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
        fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
    # concatenate to full dataframe
    fits_mod_monthly_df = pd.concat([fits_mod_monthly_df, fit_df])

fits_mod_monthly_df.reset_index(drop=True, inplace=True)

# Save to file
fits_mod_monthly_fn = os.path.join(scm_path, 'results', 'linear_fit_modeled_monthly_ela_pdd_snowfall.csv')
fits_mod_monthly_df.to_csv(fits_mod_monthly_fn, index=False)
print('Linear fits saved to file:', fits_mod_monthly_fn)
fits_mod_monthly_df

In [None]:
fig, ax = plt.subplots()
plt.hist(fits_mod_monthly_df['coef_PDD_median'], bins=100)
ax.set_title('Modeled')
plt.show()

In [None]:
# Print stats
print('Modeled:\n')

print('Overall PDD coef. stats:')
print(f'\tMedian = {np.nanmedian(fits_mod_monthly_df["coef_PDD_median"])} \tMAD = {MAD(fits_mod_monthly_df["coef_PDD_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_mod_monthly_df["coef_PDD_median"])} \tstd. = {np.nanstd(fits_mod_monthly_df["coef_PDD_median"])}\n')

print(fits_mod_monthly_df.groupby(by='Subregion')['coef_PDD_median'].median())
print('\n')
print(fits_mod_monthly_df.groupby(by='clustName')['coef_PDD_median'].median())
print('\n')
print(fits_mod_monthly_df.groupby(by=['Subregion', 'clustName'])['coef_PDD_median'].median())

### Observed monthly ELAs

In [None]:
# -----Monthly ELAs
X_cols = ['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']
y_cols = ['ELA_from_AAR_m']

fits_obs_monthly_df = pd.DataFrame()
for rgi_id in tqdm(obs_elas_monthly['RGIId'].drop_duplicates().values):
    # subset and merge data
    site_df = obs_elas_monthly.loc[obs_elas_monthly['RGIId']==rgi_id]
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 10]
    # remove dates where PDD==0
    site_df = site_df.loc[site_df['Cumulative_Positive_Degree_Days'] > 0]
    if len(site_df) >= 2:
        # prep the X and y data
        X = site_df[X_cols].values
        y = site_df[y_cols].values
        # subset to 80% to mitigate the impact of snowfall
        # X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
        # fit linear trendline
        fit_df = kfolds_linear_fit(X, y)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_monthly_df = pd.concat([fits_obs_monthly_df, fit_df])

fits_obs_monthly_df.reset_index(drop=True, inplace=True)

# Save to file
fits_obs_monthly_fn = os.path.join(scm_path, 'results', 'linear_fit_observed_monthly_ela_pdd_snowfall.csv')
fits_obs_monthly_df.to_csv(fits_obs_monthly_fn, index=False)
print('Linear fits saved to file:', fits_obs_monthly_fn)
fits_obs_monthly_df

In [None]:
plt.hist(fits_obs_monthly_df['coef_PDD_median'], bins=50)
plt.show()

In [None]:
# Print stats
print('Observed:\n')

print('Overall PDD coef. stats:')
print(f'\tMedian = {np.nanmedian(fits_obs_monthly_df["coef_PDD_median"])} \tMAD = {MAD(fits_obs_monthly_df["coef_PDD_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_obs_monthly_df["coef_PDD_median"])} \tstd. = {np.nanstd(fits_obs_monthly_df["coef_PDD_median"])}\n')

print(fits_obs_monthly_df.groupby(by='Subregion')['coef_PDD_median'].median())
print('\n')
print(fits_obs_monthly_df.groupby(by='clustName')['coef_PDD_median'].median())
print('\n')
print(fits_obs_monthly_df.groupby(by=['Subregion', 'clustName'])['coef_PDD_median'].median())

In [None]:
# -----Daily ELAs
fits_obs_daily_df = pd.DataFrame()
for rgi_id in tqdm(scs['RGIId'].drop_duplicates().values):
    obs_elas_site = scs.loc[scs['RGIId']==rgi_id]
    obs_elas_site['datetime'] = obs_elas_site['datetime'].values.astype('datetime64[D]')
    obs_elas_site.rename(columns={'datetime': 'Date'}, inplace=True)
    obs_elas_site = obs_elas_site[['Date', 'ELA_from_AAR_m']]
    eras_site = eras.loc[eras['RGIId']==rgi_id]
    # subset and merge data
    site_df = obs_elas_site.merge(eras_site[['Date', 'Cumulative_Positive_Degree_Days', 
                                             'Cumulative_Snowfall_mwe']], on='Date')
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 9]
    # remove dates where PDD==0
    site_df = site_df.loc[site_df['Cumulative_Positive_Degree_Days'] > 0]
    if len(site_df) >= 2:
        # fit linear trendline
        X = site_df[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
        y = site_df['ELA_from_AAR_m'].values
        # save in dataframe
        fit_df = kfolds_linear_fit(X, y)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_daily_df = pd.concat([fits_obs_daily_df, fit_df])

fits_obs_daily_df.reset_index(drop=True, inplace=True)

# Save
fits_obs_daily_fn = os.path.join(scm_path, 'results', 'linear_fit_observed_daily_ela_pdd_snowfall_daily.csv')
fits_obs_daily_df.to_csv(fits_obs_daily_fn, index=False)
print('Linear fits saved to file:', fits_obs_daily_fn)
fits_obs_daily_df

In [None]:
plt.hist(fits_obs_daily_df['coef_PDD_median'], bins=50)
plt.show()

In [None]:
# ----Plot a few example linear fits for the modeled and remotely-sensed
# Plot the trans-continental sites in the St. Elias Mtns.
rgi_ids = aois.loc[(aois['Subregion']=='St. Elias Mtns.') 
                   & (aois['clustName']=='Transitional-Continental'), 'RGIId'].values
for rgi_id in tqdm(rgi_ids):
    fig, ax = plt.subplots(1, 2, figsize=(12,6), sharey=True, sharex=True)
    for i, df in enumerate([mod_elas_monthly, obs_elas_monthly]):
        # subset and merge data
        site_df = df.loc[df['RGIId']==rgi_id]
        # only include dates before October
        site_df = site_df.loc[site_df['Date'].dt.month < 9]
        # remove dates where PDD==0
        site_df = site_df.loc[site_df['Cumulative_Positive_Degree_Days'] > 0]
        # prep the X and y data
        X = site_df[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
        if i==0:
            y = site_df['ELA_m'].values
        else:
            y = site_df['ELA_from_AAR_m'].values
        # subset to 80% to mitigate the impact of snowfall
        X_sub, y_sub = subset_Xy_data(X, y, p=0.6)
        # fit linear regression model
        model = LinearRegression().fit(X_sub, y_sub)
        score = model.score(X, y)
        coefs = np.ravel(model.coef_)
        # plot
        ax[i].plot(X[:,0], model.predict(X), 'ok', label='Linear fit')
        ax[i].plot(X[:,0], y, '.', label='Modeled')
        ax[i].set_xlabel('$\Sigma$PDD')
        ax[i].set_ylabel('ELA [m]')
        ax[i].legend(loc='upper left')
        if i==0:
            run = 'Modeled'
        else:
            run = 'Observed'
        ax[i].set_title(f'{run}\nELA coef = {np.round(coefs[0],3)}, Score = {np.round(score, 3)}')
    fig.suptitle(rgi_id)
    plt.show()

### Modeled annual ELAs

In [None]:
fits_mod_annual_df = pd.DataFrame()
for rgi_id in tqdm(mod_elas_annual['RGIId'].drop_duplicates().values):
    # Subset annual ELAs to site
    mod_elas_annual_site = mod_elas_annual.loc[mod_elas_annual['RGIId']==rgi_id]
    # Split data into X and y
    X = mod_elas_annual_site[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
    y = mod_elas_annual_site['ELA_m'].values
    # Fit linear regression w/ K-folds
    n_folds = len(X)
    fit_df = kfolds_linear_fit(X, y, n_folds=n_folds)
    fit_df['RGIId'] = rgi_id
    # add RGI regions and climate cluster to df
    for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
        fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
    # concatenate to full dataframe
    fits_mod_annual_df = pd.concat([fits_mod_annual_df, fit_df])
fits_mod_annual_df.reset_index(drop=True, inplace=True)

# Save to file
fits_mod_annual_fn = os.path.join(scm_path, 'results', 'linear_fit_modeled_annual_ela_pdd_snowfall.csv')
fits_mod_annual_df.to_csv(fits_mod_annual_fn, index=False)
print('Linear fits saved to file:', fits_mod_annual_fn)
fits_mod_annual_df


In [None]:
plt.hist(fits_mod_annual_df['coef_PDD_median'], bins=50)
plt.show()

### Observed annual ELAs

In [None]:
fits_obs_annual_df = pd.DataFrame()
for rgi_id in tqdm(obs_elas_annual['RGIId'].drop_duplicates().values):
    # Subset annual ELAs to site
    obs_elas_annual_site = obs_elas_annual.loc[obs_elas_annual['RGIId']==rgi_id]
    # Split data into X and y
    X = obs_elas_annual_site[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
    y = obs_elas_annual_site['ELA_from_AAR_m'].values
    # Fit linear regression w/ K-folds
    n_folds = len(X)
    if n_folds > 1:
        fit_df = kfolds_linear_fit(X, y, n_folds=n_folds)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_annual_df = pd.concat([fits_obs_annual_df, fit_df])
fits_obs_annual_df.reset_index(drop=True, inplace=True)

# Save to file
fits_obs_annual_fn = os.path.join(scm_path, 'results', 'linear_fit_observed_annual_ela_pdd_snowfall.csv')
fits_obs_annual_df.to_csv(fits_obs_annual_fn, index=False)
print('Linear fits saved to file:', fits_obs_annual_fn)
fits_obs_annual_df


In [None]:
plt.plot(X[:,0], y, '.')
plt.show()

In [None]:
plt.hist(fits_obs_annual_df['coef_PDD_median'], bins=50)
plt.show()