# Assess ELAs from observations and modeled conditions

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
from tqdm.auto import tqdm
from scipy.stats import median_abs_deviation as MAD
from scipy.interpolate import CubicSpline
import sys
import seaborn as sns
# Suppress future warning from pandas
import warnings
warnings.filterwarnings("ignore")
import matplotlib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import ruptures as rpt
from scipy import optimize

In [None]:
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/'
sys.path.append(os.path.join(base_path, 'functions'))
import model_analyze_utils as f

# scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'

## Load glacier boundaries, ERA5-Land data, and compiled snow cover stats

In [None]:
# -----Load glacier boundaries with climate clusters
aois_fn = os.path.join(scm_path, 'compiled_data', 'all_aois_climate_cluster.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All AOIs with climate clusters loaded from file.')

# -----Load ERA data
eras_fn = os.path.join(scm_path, 'compiled_data', 'all_era_data.csv')
eras = pd.read_csv(eras_fn)
# format dates as datetimes
eras['Date'] = pd.to_datetime(eras['Date'])
# rename "site_name" column to "RGIId"
eras.rename(columns={'site_name': 'RGIId'}, inplace=True)
print('All ERA data loaded from file.')

# -----Load compiled snow cover statts
scs_fn = os.path.join(scm_path, 'compiled_data', 'all_snow_cover_stats.csv')
scs = pd.read_csv(scs_fn)
scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
print('All snow cover stats loaded from file.')
scs

## Estimate and save ELAs

### Modeled

In [None]:
# Piece-wise linear fit function for extrapolating the ELA when SMB < 0 everywhere
def linear_fit(x, m, b):
    return m*x + b
    
def extrapolate_ela_linear(X,y, Iend=8):
    # optimize the linear fit
    p, e = optimize.curve_fit(linear_fit, X[0:Iend+1], y[0:Iend+1])
    # extrapolate where y=0
    ela = linear_fit(0, *p)
    return ela

def extrapolate_ela_piecewise_linear(X,y):
    # identify breakpoints
    algo = rpt.Pelt(model="rbf").fit(signal)
    result = algo.predict(pen=10)
                                 
def extrapolate_ela_cubic_spline(X,y):
    # check that X is increasing
    if X[1] < X[0]:
        spline = CubicSpline(np.flip(X), np.flip(y), bc_type='natural')
    else:
        spline = CubicSpline(X, y, bc_type='natural')
    ela = spline(0)
    
    return ela

In [None]:
mod_elas_fn = os.path.join(scm_path, 'Rounce_et_al_2023', 'modeled_elas.csv')
if os.path.exists(mod_elas_fn):
    mod_elas = pd.read_csv(mod_elas_fn, index_col=0)
    mod_elas.index = pd.DatetimeIndex(mod_elas.index)
    print('Modeled ELAs loaded from file.')
else:
    
    # load binned model data
    bin_fns = sorted(glob.glob(os.path.join(scm_path, 'Rounce_et_al_2023', 'binned', '*.nc')))
    
    # remove binned file names for sites without snow cover observations
    aoi_ids = [x[7:] for x in aois['RGIId'].drop_duplicates().values]
    bin_fns = [x for x in bin_fns if os.path.basename(x)[0:7] in aoi_ids]

    # iterate over binned file names
    i=0
    for bin_fn in tqdm(bin_fns):
        # open binned data
        bin = xr.open_dataset(bin_fn)
        rgi_id = bin.RGIId.data[0] # grab RGI ID

        # grab data variables
        h = bin.bin_surface_h_initial.data[0] # surface elevation [m]
        b_sum = np.zeros((len(bin.time.data), len(h))) # cumulative SMB
        times = [np.datetime64(x) for x in bin.time.data] # datetimes
        months = list(pd.DatetimeIndex(times).month) # months of each datetime
        elas = np.zeros(len(times)) # initialize transient ELAs

        # iterate over each time period
        for j, time in enumerate(times):
            # subset binned data to time
            bin_time = bin.isel(time=j)
            # grab the SMB 
            b_sum[j,:] = bin_time.bin_massbalclim_monthly.data[0]
            # add the previous SMB (restart the count in October)
            if months[j] != 10: 
                b_sum[j,:] += b_sum[j-1,:]
            # If all SMB > 0, ELA = minimum elevation
            if all(b_sum[j,:] > 0):
                elas[j] = np.min(h)
            # If SMB is > 0 and < 0 in some places, linearly interpolate ELA
            elif any(b_sum[j,:] < 0) & any(b_sum[j,:] > 0):
                elas[j] = np.interp(0, np.flip(b_sum[j,:]), np.flip(h))
            # If SMB < 0 everywhere, fit a piecewise linear fit and extrapolate for SMB=0
            elif all(b_sum[j,:] < 0):
                X, y = b_sum[j,:], h
                elas[j] = extrapolate_ela_linear(X, y, Iend=5)
        
        # compile in dataframe
        if i==0:
            mod_elas = pd.DataFrame({'Date': times, rgi_id: elas})
        else:
            df = pd.DataFrame({'Date': times, rgi_id: elas})
            mod_elas = mod_elas.merge(df, on='Date')
            
        i+=1

    # save to file
    mod_elas.set_index('Date', inplace=True)
    mod_elas.to_csv(mod_elas_fn, index=True)
    print('Modeled ELAs saved to file:', mod_elas_fn)

# Subset to after 2013
mod_elas = mod_elas.loc[pd.DatetimeIndex(mod_elas.index).year >= 2013]
# Subset to April-Sept
mod_elas = mod_elas.loc[(pd.DatetimeIndex(mod_elas.index).month >= 3) 
                        & (pd.DatetimeIndex(mod_elas.index).month <=9)]

mod_elas

### Remotely-sensed

In [None]:
obs_elas_fn = os.path.join(scm_path, 'compiled_data', 'observed_elas.csv')

# Check if ELAs already exist in file
if os.path.exists(obs_elas_fn):
    obs_elas = pd.read_csv(obs_elas_fn, index_col=0)
    obs_elas.set_index(pd.DatetimeIndex(obs_elas.index), inplace=True)
    print('Remotely-sensed ELAs loaded from file.')
else:
    # Add Year and Month columns to snow cover stats
    scs['Year'] = scs['datetime'].dt.year.values
    scs['Month'] = scs['datetime'].dt.month.values
    
    # Calculate the mean. monthly ELA at each site
    obs_elas = scs.groupby(by=['RGIId', 'Year', 'Month'])['ELA_from_AAR_m'].mean().reset_index()

    # Add Date column
    obs_elas['Date'] = [np.datetime64(f'{year}-{month}-01') if month >=10 else 
                        np.datetime64(f'{year}-0{month}-01')
                        for year, month in obs_elas[['Year', 'Month']].values]
    
    # Restructure so that Date is the index and each column it the RGIId
    obs_elas = obs_elas.pivot_table(index='Date', columns='RGIId', values='ELA_from_AAR_m')
    
    # Save to file
    obs_elas.to_csv(obs_elas_fn, index=True)
    print('Remotely-sensed ELAs saved to file:', obs_elas_fn)

obs_elas

## Compare modeled to observed ELAs

### Transient ELAs

In [None]:
elas_transient_diff_df = pd.DataFrame()

# Add Year columns
obs_elas['Year'] = obs_elas.index.year
mod_elas['Year'] = mod_elas.index.year

i=0
for rgi_id in tqdm(list(mod_elas.columns[0:194])):
    # Subset data to site
    obs_elas_site = obs_elas[[rgi_id, 'Year']].reset_index()
    obs_elas_site.loc[obs_elas_site[rgi_id] > 1e10, rgi_id] = np.nan
    mod_elas_site = mod_elas[[rgi_id, 'Year']].reset_index()

    # Subset to 2013-2022
    obs_elas_site = obs_elas_site.loc[(obs_elas_site['Year'] <= 2022)]
    mod_elas_site = mod_elas_site.loc[(mod_elas_site['Year'] <= 2022)]
    
    # Subtract minimum ELA to directly compare
    obs_elas_site_relative, mod_elas_site_relative = obs_elas_site.copy(), mod_elas_site.copy()
    obs_elas_site_relative[rgi_id] -= np.nanmin(scs.loc[scs['RGIId']==rgi_id, 'ELA_from_AAR_m'])
    mod_elas_site_relative[rgi_id] -= np.nanmin(mod_elas_site[rgi_id])

    # calculate differences
    merged = obs_elas_site_relative.merge(mod_elas_site_relative, on=['Date', 'Year'], suffixes=['_obs', '_mod'])
    merged['ELA_obs-ELA_mod [m]'] = merged[rgi_id + '_obs'] - merged[rgi_id + '_mod']

    # Add to full dataframe
    diff_df = merged['ELA_obs-ELA_mod [m]'].reset_index()
    diff_df['RGIId'] = rgi_id
    elas_transient_diff_df = pd.concat([elas_transient_diff_df, diff_df])

    i+=1

# Drop Year columns
obs_elas.drop(columns='Year', inplace=True)
mod_elas.drop(columns='Year', inplace=True)

plt.figure()
plt.hist(elas_transient_diff_df.groupby('RGIId')['ELA_obs-ELA_mod [m]'].mean().values, bins=50)
plt.xlabel('ELA$_{obs}$ - ELA$_{mod}$ [m]')
plt.ylabel('Counts')
plt.title('Transient ELAs')
plt.show()

print('Difference stats:')
print(f'Mean diff = {elas_transient_diff_df["ELA_obs-ELA_mod [m]"].mean()} m')
print(f'Std. diff = {elas_transient_diff_df["ELA_obs-ELA_mod [m]"].std()} m')
print(f'Median diff = {elas_transient_diff_df["ELA_obs-ELA_mod [m]"].median()} m')
print(f'MAD diff = {MAD(elas_transient_diff_df["ELA_obs-ELA_mod [m]"].values, nan_policy="omit")} m')

### Annual ELAs

In [None]:
elas_diff_df = pd.DataFrame()

# Add Year columns
obs_elas['Year'] = obs_elas.index.year
mod_elas['Year'] = mod_elas.index.year

# Calculate annual max ELA
obs_elas_max = obs_elas.groupby('Year').max()
mod_elas_max = mod_elas.groupby('Year').max()

i=0
for rgi_id in tqdm(list(mod_elas.columns[0:194])):
    # Subset data to site
    obs_elas_site = obs_elas_max[rgi_id].reset_index()
    obs_elas_site.loc[obs_elas_site[rgi_id] > 1e30, rgi_id] = np.nan
    mod_elas_site = mod_elas_max[rgi_id].reset_index()

    # Subset to 2016-2022
    obs_elas_site = obs_elas_site.loc[(obs_elas_site['Year'] >= 2017) & (obs_elas_site['Year'] <= 2022)]
    mod_elas_site = mod_elas_site.loc[(mod_elas_site['Year'] >= 2017) & (mod_elas_site['Year'] <= 2022)]

    # Subtract minimum ELA to directly compare
    obs_elas_site_relative, mod_elas_site_relative = obs_elas_site.copy(), mod_elas_site.copy()
    obs_elas_site_relative[rgi_id] -= np.nanmin(scs.loc[scs['RGIId']==rgi_id, 'ELA_from_AAR_m'])
    mod_elas_site_relative[rgi_id] -= np.nanmin(mod_elas[rgi_id])

    # calculate differences
    merged = obs_elas_site_relative.merge(mod_elas_site_relative, on='Year', suffixes=['_obs', '_mod'])
    merged['ELA_obs-ELA_mod [m]'] = merged[rgi_id + '_obs'] - merged[rgi_id + '_mod']

    # Add to full dataframe
    diff_df = merged['ELA_obs-ELA_mod [m]'].reset_index()
    diff_df['RGIId'] = rgi_id
    elas_diff_df = pd.concat([elas_diff_df, diff_df])

    i+=1

# Drop Year columns
obs_elas.drop(columns='Year', inplace=True)
mod_elas.drop(columns='Year', inplace=True)

plt.figure()
plt.hist(elas_diff_df.groupby('RGIId')['ELA_obs-ELA_mod [m]'].mean().values, bins=50)
plt.xlabel('ELA$_{obs}$ - ELA$_{mod}$ [m]')
plt.ylabel('Counts')
plt.title('ELAs')
plt.show()

print('Difference stats:')
print(f'Mean diff = {elas_diff_df["ELA_obs-ELA_mod [m]"].mean()} m')
print(f'Std. diff = {elas_diff_df["ELA_obs-ELA_mod [m]"].std()} m')
print(f'Median diff = {elas_diff_df["ELA_obs-ELA_mod [m]"].median()} m')
print(f'MAD diff = {MAD(elas_diff_df["ELA_obs-ELA_mod [m]"].values, nan_policy="omit")} m')

## Fit linear trendlines PDD sum + Snowfall sum. = ELA

In [None]:
int(len(X) * 0.1)

In [None]:

plt.plot(X_sorted_subset[:,0], X_sorted_subset[:,1], 'ok')
plt.plot(X[:,0], X[:,1], '.')
plt.show()

In [None]:
def subset_Xy_data(X, y, p=0.9):
    # sort the X data by increasing PDDs
    Iargsort = X[:, 0].argsort()
    X_sorted, y_sorted = X[Iargsort,:], y[Iargsort]
    # select the middle p% of the data
    n10 = int(len(X)*(1-p)) # number of points in 20% of the data
    X_sorted_subset = X_sorted[int(n10/2):-int(n10/2), :]
    y_sorted_subset = y_sorted[int(n10/2):-int(n10/2)]

    return X_sorted_subset, y_sorted_subset
    
def linear_fit(X, y):
    model = LinearRegression().fit(X, y)
    score = model.score(X, y)
    coefs = model.coef_
    return coefs, score
    
# Define function for K-folds cross-validation model fitting
def kfolds_linear_fit(X, y, n_folds=5):
    # Define K-folds
    kf = KFold(n_splits=n_folds)
    # Initialize parameters
    coefs_PDD, coefs_snowfall, scores = [], [], []
    # Iterate over fold indices
    for i, (train_index, test_index) in enumerate(kf.split(X)):
        # Split X and y into training and testing
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Fit model to testing
        coefs, score = linear_fit(X_train, y_train)
        coefs_PDD.append(coefs[0])
        coefs_snowfall.append(coefs[1])
        scores.append(score)
    # Calculate stats, compile in dataframe
    df = pd.DataFrame({'coef_PDD_mean': [np.nanmean(coefs_PDD)],
                       'coef_PDD_std': [np.nanstd(coefs_PDD)],
                       'coef_PDD_median': [np.nanmedian(coefs_PDD)],
                       'coef_PDD_MAD': [MAD(coefs_PDD)],
                       'coef_snowfall_mean': [np.nanmean(coefs_snowfall)],
                       'coef_snowfall_std': [np.nanstd(coefs_snowfall)],
                       'coef_snowfall_median': [np.nanmedian(coefs_snowfall)],
                       'coef_snowfall_MAD': [MAD(coefs_snowfall)],
                       'score_mean': [np.nanmean(scores)],
                       'score_median': [np.nanmedian(scores)]
                      })
    return df

### Restructure ERA5-Land data and resample to monthly

In [None]:
# Add Year and month columns to ERA and modeled ELA data
eras['Year'] = eras['Date'].dt.year
eras['Month'] = eras['Date'].dt.month

# Resample ERA data to monthly resolution to match modeled data
eras_monthly = eras.groupby(by=['RGIId', 'Year', 'Month'])[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].mean().reset_index()
eras_monthly

# Remove dates outside 2013-2022
eras = eras.loc[(eras['Year'] >= 2013) & (eras['Year'] <= 2022)]

# Add Date column
eras_monthly['Date'] = [np.datetime64(f'{year}-{month}-01') if month >= 10 else
                        np.datetime64(f'{year}-0{month}-01') 
                        for year, month in eras_monthly[['Year', 'Month']].values]

# Rearrange dataframe to match ELAs
pdds_monthly_df = eras_monthly.pivot_table(index='Date', columns='RGIId', values='Cumulative_Positive_Degree_Days')
sf_monthly_df = eras_monthly.pivot_table(index='Date', columns='RGIId', values='Cumulative_Snowfall_mwe')
pdds_monthly_df

### Modeled ELAs

In [None]:
# Fit linear trendlines to ELAs + Snowfall = PDDs
fits_mod_df = pd.DataFrame()
for rgi_id in tqdm(mod_elas.columns):
    # subset and merge data
    site_df = (pdds_monthly_df[rgi_id].reset_index().merge(sf_monthly_df[rgi_id].reset_index(), 
                                                    on='Date', suffixes=['_PDD', '_SF']).merge(mod_elas[rgi_id].reset_index(), 
                                                                                               on='Date'))
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 9]
    # remove dates where PDDs==0
    site_df.loc[site_df[rgi_id+'_PDD'] > 0].reset_index(drop=True, inplace=True)
    # prep the X and y data
    X = site_df[[rgi_id+'_PDD', rgi_id+'_SF']].values
    y = site_df[rgi_id].values
    # subset to 80% to mitigate the impact of snowfall
    X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
    # fit linear trendline
    fit_df = kfolds_linear_fit(X_sub, y_sub)
    fit_df['RGIId'] = rgi_id
    # add RGI regions and climate cluster to df
    for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
        fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
    # concatenate to full dataframe
    fits_mod_df = pd.concat([fits_mod_df, fit_df])
    # plot
    # fig, ax = plt.subplots()
    # ax.plot(X[:,0], y, '.')
    # ax.set_xlabel('$\Sigma$PDDs')
    # ax.set_ylabel('Transient ELA [m]')
    # plt.show()
fits_mod_df.reset_index(drop=True, inplace=True)
fits_mod_df

# Save
fits_mod_fn = os.path.join(scm_path, 'results', 'linear_fit_ela_modeled_pdd_snowfall.csv')
fits_mod_df.to_csv(fits_mod_fn, index=False)
print('Linear fits saved to file:', fits_mod_fn)
fits_mod_df

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10,12))
fits_mod_df.loc[fits_mod_df['coef_PDD_median'] > 1e10, 'coef_PDD_median'] = np.nan
sns.histplot(data=fits_mod_df, x='coef_PDD_median', multiple='stack', hue='clustName', ax=ax[0])
sns.histplot(data=fits_mod_df, x='coef_PDD_median', multiple='stack', hue='Subregion', ax=ax[1])
ax[0].set_title('Modeled')
plt.show()

In [None]:
# Print stats
print('Modeled:\n')

print('Overall PDD coef. stats:')
print(f'\tMedian = {np.nanmedian(fits_mod_df["coef_PDD_median"])} \tMAD = {MAD(fits_mod_df["coef_PDD_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_mod_df["coef_PDD_median"])} \tstd. = {np.nanstd(fits_mod_df["coef_PDD_median"])}\n')

print(fits_mod_df.groupby(by='Subregion')['coef_PDD_median'].median())
print('\n')
print(fits_mod_df.groupby(by='clustName')['coef_PDD_median'].median())
print('\n')
print(fits_mod_df.groupby(by=['Subregion', 'clustName'])['coef_PDD_median'].median())

### Observed ELAs

In [None]:
# -----Monthly ELAs
fits_obs_df = pd.DataFrame()
for rgi_id in tqdm(obs_elas.columns):
    # subset and merge data
    site_df = (pdds_monthly_df[rgi_id].reset_index().merge(sf_monthly_df[rgi_id].reset_index(), 
                                                   on='Date', suffixes=['_PDD', '_SF']).merge(obs_elas[rgi_id].reset_index(), 
                                                                                              on='Date'))
    site_df.dropna(inplace=True)
    # only include dates before October
    site_df = site_df.loc[site_df['Date'].dt.month < 10]
    # remove dates where PDD==0
    site_df = site_df.loc[site_df[rgi_id+'_PDD'] > 0]
    if len(site_df) >= 2:
        # prep the X and y data
        X = site_df[[rgi_id+'_PDD', rgi_id+'_SF']].values
        y = site_df[rgi_id].values
        # subset to 80% to mitigate the impact of snowfall
        X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
        # fit linear trendline
        fit_df = kfolds_linear_fit(X, y)
        fit_df['RGIId'] = rgi_id
        # add RGI regions and climate cluster to df
        for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
            fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
        # concatenate to full dataframe
        fits_obs_df = pd.concat([fits_obs_df, fit_df])

fits_obs_df.reset_index(drop=True, inplace=True)

# Save
fits_obs_fn = os.path.join(scm_path, 'results', 'linear_fit_ela_observed_pdd_snowfall_monthly.csv')
fits_obs_df.to_csv(fits_obs_fn, index=False)
print('Linear fits saved to file:', fits_obs_fn)
fits_obs_df

In [None]:
# # -----Daily ELAs
# fits_obs_df = pd.DataFrame()
# for rgi_id in tqdm(scs['RGIId'].drop_duplicates().values):
#     elas_site = scs.loc[scs['RGIId']==rgi_id]
#     elas_site['datetime'] = elas_site['datetime'].values.astype('datetime64[D]')
#     elas_site.rename(columns={'datetime': 'Date'}, inplace=True)
#     elas_site = elas_site[['Date', 'ELA_from_AAR_m']]
#     eras_site = eras.loc[eras['RGIId']==rgi_id]
#     # subset and merge data
#     site_df = (eras_site[['Date', 'Cumulative_Positive_Degree_Days', 
#                           'Cumulative_Snowfall_mwe']]).merge(elas_site, on='Date')
#     site_df.dropna(inplace=True)
#     # only include dates before October
#     site_df = site_df.loc[site_df['Date'].dt.month < 10]
#     # remove dates where PDD==0
#     site_df = site_df.loc[site_df['Cumulative_Positive_Degree_Days'] > 0]
#     if len(site_df) >= 2:
#         # fit linear trendline
#         X = site_df[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
#         y = site_df['ELA_from_AAR_m'].values
#         # save in dataframe
#         fit_df = kfolds_linear_fit(X, y)
#         fit_df['RGIId'] = rgi_id
#         # add RGI regions and climate cluster to df
#         for col in ['O1Region', 'O2Region', 'Subregion', 'cluster', 'clustName']:
#             fit_df[col] = [aois.loc[aois['RGIId']==rgi_id, col].values[0]]
#         # concatenate to full dataframe
#         fits_obs_df = pd.concat([fits_obs_df, fit_df])

# fits_obs_df.reset_index(drop=True, inplace=True)

# # Save
# fits_obs_fn = os.path.join(scm_path, 'results', 'linear_fit_ela_observed_pdd_snowfall_daily.csv')
# fits_obs_df.to_csv(fits_obs_fn, index=False)
# print('Linear fits saved to file:', fits_obs_fn)
# fits_obs_df

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10,12))
fits_obs_df.loc[np.abs(fits_obs_df['coef_PDD_median']) > 10, 'coef_PDD_median'] = np.nan
sns.histplot(data=fits_obs_df, x='coef_PDD_median', multiple='stack', hue='clustName', ax=ax[0])
sns.histplot(data=fits_obs_df, x='coef_PDD_median', multiple='stack', hue='Subregion', ax=ax[1])
ax[0].set_title('Observed')
plt.show()

In [None]:
# Print stats
print('Observed:\n')

print('Overall PDD coef. stats:')
print(f'\tMedian = {np.nanmedian(fits_obs_df["coef_PDD_median"])} \tMAD = {MAD(fits_obs_df["coef_PDD_median"], nan_policy="omit")}')
print(f'\tMean = {np.nanmean(fits_obs_df["coef_PDD_median"])} \tstd. = {np.nanstd(fits_obs_df["coef_PDD_median"])}\n')

print(fits_obs_df.groupby(by='Subregion')['coef_PDD_median'].median())
print('\n')
print(fits_obs_df.groupby(by='clustName')['coef_PDD_median'].median())
print('\n')
print(fits_obs_df.groupby(by=['Subregion', 'clustName'])['coef_PDD_median'].median())

In [None]:
# ----Plot a few example linear fits for the modeled and remotely-sensed
# Plot the trans-continental sites in the St. Elias Mtns.
rgi_ids = aois.loc[(aois['Subregion']=='St. Elias Mtns.') 
                   & (aois['clustName']=='Transitional-Continental'), 'RGIId'].values
for rgi_id in tqdm(rgi_ids):
    fig, ax = plt.subplots(1, 2, figsize=(12,6), sharey=True, sharex=True)
    for i, df in enumerate([mod_elas, obs_elas]):
        # subset and merge data
        site_df = (pdds_monthly_df[rgi_id].reset_index().merge(sf_monthly_df[rgi_id].reset_index(), 
                                                    on='Date', suffixes=['_PDD', '_SF']).merge(df[rgi_id].reset_index(), 
                                                                                               on='Date'))
        site_df.dropna(inplace=True)
        # only include dates before October
        site_df = site_df.loc[site_df['Date'].dt.month < 9]
        # remove dates where PDD==0
        site_df = site_df.loc[site_df[rgi_id+'_PDD'] > 0]
        # prep the X and y data
        X = site_df[[rgi_id+'_PDD', rgi_id+'_SF']].values
        y = site_df[rgi_id].values
        # subset to 80% to mitigate the impact of snowfall
        X_sub, y_sub = subset_Xy_data(X, y, p=0.8)
        # fit linear regression model
        model = LinearRegression().fit(X, y)
        score = model.score(X, y)
        coefs = model.coef_
        # plot
        ax[i].plot(X[:,0], model.predict(X), 'ok', label='Linear fit')
        ax[i].plot(X[:,0], y, '.', label='Modeled')
        ax[i].set_xlabel('$\Sigma$PDD')
        ax[i].set_ylabel('ELA [m]')
        ax[i].legend(loc='upper left')
        if i==0:
            run = 'Modeled'
        else:
            run = 'Observed'
        ax[i].set_title(f'{run}\nELA coef = {np.round(coefs[0],3)}, Score = {np.round(score, 3)}')
    fig.suptitle(rgi_id)
    plt.show()