# Estimate modeled snowline altitudes and ELAs

In [None]:
import os
from scipy import optimize
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import median_abs_deviation as MAD
from tqdm.auto import tqdm
import glob

In [None]:
# Load remotely-sensed ELAs
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
obs_slas_monthly_fn = os.path.join(scm_path, 'analysis', 'monthly_SLAs_observed.csv')
obs_slas_monthly = pd.read_csv(obs_slas_monthly_fn)
obs_slas_monthly['Date'] = pd.to_datetime(obs_slas_monthly['Date'])
obs_elas_annual_fn = os.path.join(scm_path, 'analysis', 'annual_ELAs_observed.csv')
obs_elas_annual = pd.read_csv(obs_elas_annual_fn)
obs_elas_annual['Date'] = pd.to_datetime(obs_elas_annual['Date'])

# Grab modeled SMB file names
modeled_path = os.path.join(scm_path, 'Rounce_et_al_2023', 'binned')


## Estimate modeled snowline altitudes

In [None]:
# Functions for linearly extrapolating the ELA when modeled SMB < 0 everywhere
def linear_fit(x, m, b):
    return m*x + b
    
def extrapolate_ela_linear(X,y, Iend=8):
    # optimize the linear fit
    p, e = optimize.curve_fit(linear_fit, X[0:Iend+1], y[0:Iend+1])
    # extrapolate where y=0
    ela = linear_fit(0, *p)
    return ela

In [None]:
# -----Monthly snowline altitudes
mod_slas_monthly_fn = os.path.join(scm_path, 'analysis', 'monthly_SLAs_modeled.csv')
if os.path.exists(mod_slas_monthly_fn):
    mod_slas_monthly = pd.read_csv(mod_slas_monthly_fn)
    mod_slas_monthly['Date'] = pd.DatetimeIndex(mod_slas_monthly['Date'])
    print('Modeled monthly SLAs loaded from file.')
else:
    
    # load binned model data
    bin_fns = sorted(glob.glob(os.path.join(modeled_path, '*.nc')))
    
    # remove binned file names for sites without snow cover observations
    aoi_ids = [x[7:] for x in sorted(aois['RGIId'].drop_duplicates().values)]
    bin_fns = [x for x in bin_fns if os.path.basename(x)[0:7] in aoi_ids]

    # initialize dataframe for results
    mod_slas_monthly = pd.DataFrame()

    # iterate over binned file names
    i=0
    for bin_fn in tqdm(bin_fns):
        # open binned data
        bin = xr.open_dataset(bin_fn)
        rgi_id = bin.RGIId.data[0] # grab RGI ID

        # grab data variables
        h = bin.bin_surface_h_initial.data[0] # surface elevation [m]
        b_sum = np.zeros((len(bin.time.data), len(h))) # cumulative SMB
        times = [np.datetime64(x) for x in bin.time.data] # datetimes
        months = list(pd.DatetimeIndex(times).month) # months of each datetime
        elas = np.zeros(len(times)) # initialize transient ELAs

        # iterate over each time period
        for j, time in enumerate(times):
            # subset binned data to time
            bin_time = bin.isel(time=j)
            # grab the SMB 
            b_sum[j,:] = bin_time.bin_massbalclim_monthly.data[0]
            # add the previous SMB (restart the count in October)
            if months[j] != 10: 
                b_sum[j,:] += b_sum[j-1,:]
            # If all SMB > 0, ELA = minimum elevation
            if all(b_sum[j,:] > 0):
                elas[j] = np.min(h)
            # If SMB is > 0 and < 0 in some places, linearly interpolate ELA
            elif any(b_sum[j,:] < 0) & any(b_sum[j,:] > 0):
                elas[j] = np.interp(0, np.flip(b_sum[j,:]), np.flip(h))
            # If SMB < 0 everywhere, fit a piecewise linear fit and extrapolate for SMB=0
            elif all(b_sum[j,:] < 0):
                X, y = b_sum[j,:], h
                elas[j] = extrapolate_ela_linear(X, y, Iend=5)
            else:
                print('issue')

        # compile in dataframe
        df = pd.DataFrame({'Date': times,
                           'ELA_m': elas})
        
        # Because each SMB value represents the total SMB for each month, add 1 month to the dates
        df['Date'] = df['Date'] + pd.DateOffset(months=1)
        df['RGIId'] = rgi_id

        # Add ERA5 data for each date
        eras_site = eras.loc[eras['RGIId']==rgi_id]
        eras_site = eras_site[['Date', 'cumulative_positive_degree_days', 'cumulative_snowfall_sum']]
        df = df.merge(eras_site, on='Date')
        mod_slas_monthly = pd.concat([mod_slas_monthly, df])
            
        i+=1

    # Rearrange columns
    mod_slas_monthly = mod_slas_monthly[['RGIId', 'Date', 'ELA_m', 
                                         'cumulative_positive_degree_days', 
                                         'cumulative_snowfall_sum']]
    # save to file
    mod_slas_monthly.to_csv(mod_slas_monthly_fn, index=False)
    print('Modeled monthly SLAs saved to file:', mod_elas_monthly_fn)

mod_slas_monthly.reset_index(drop=True, inplace=True)
mod_slas_monthly

## Calculate modeled ELAs

In [None]:
# -----Annual ELAs
mod_elas_annual_fn = os.path.join(scm_path, 'analysis', 'annual_ELAs_modeled.csv')
if os.path.exists(mod_elas_annual_fn):
    mod_elas_annual = pd.read_csv(mod_elas_annual_fn)
    mod_elas_annual['Date'] = pd.to_datetime(mod_elas_annual['Date'])
    print('Modeled annual ELAs loaded from file.')
else:
    # Add Year column
    mod_elas_monthly['Year'] = pd.DatetimeIndex(mod_elas_monthly['Date']).year
    # Identify the row of maximum ELA for each site and each year
    Imax = mod_elas_monthly.groupby(by=['RGIId', 'Year'])['ELA_m'].idxmax().values
    mod_elas_annual = mod_elas_monthly.iloc[Imax].reset_index(drop=True)
    # Reorder columns
    mod_elas_monthly = mod_elas_monthly[['RGIId', 'Date', 'Year', 'ELA_m', 
                                         'cumulative_positive_degree_days',
                                         'cumulative_snowfall_sum']]
    # Save to file
    mod_elas_annual.to_csv(mod_elas_annual_fn, index=False)
    print('Modeled annual ELAs saved to file:', mod_elas_annual_fn)
    
mod_elas_annual

## Difference modeled to remotely-sensed snowlines

### Calculate minimum remotely-sensed snowline altitudes for standardizing elevations

In [None]:
# Define output file name
min_obs_sla_fn = os.path.join(scm_path, 'analysis', 'minimum_remotely-sensed_snowline_altitudes.csv')
if not os.path.exists(min_obs_sla_fn):
    min_obs_sla = pd.DataFrame()
    # Iterate over sites
    for rgi_id in tqdm(obs_elas_monthly['RGIId'].drop_duplicates().values):
        # Load snowlines
        scs = pd.DataFrame()
        sc_fns = sorted(glob.glob(os.path.join(scm_path, 'study-sites', rgi_id, 'imagery', 'snowlines', '*.csv')))
        for fn in sc_fns:
            sc = pd.read_csv(fn)
            scs = pd.concat([scs, sc], axis=0)
        # Remove any wonky values
        scs.loc[np.abs(scs['ELA_from_AAR_m']) > 1e10] = np.nan
        # Get minimum snowline altitude
        min_sla = scs['ELA_from_AAR_m'].min()
        # Add to dataframe
        df = pd.DataFrame({'RGIId': [rgi_id], 'SLA_min_m': [min_sla]})
        min_obs_sla = pd.concat([min_obs_sla, df], axis=0)

    # Save to file
    min_obs_sla.reset_index(drop=True, inplace=True)
    min_obs_sla.to_csv(min_obs_sla_fn, index=False)
    print('Minimum snowline altitudes saved to file:', min_obs_sla_fn)
    
else:
    min_obs_sla = pd.read_csv(min_obs_sla_fn)
    print('Minimum snowline altitudes loaded.')

min_obs_sla

### Monthly snowline altitudes

In [None]:
# Define output file
slas_monthly_merged_fn = os.path.join(scm_path, 'analysis', 'monthly_SLAs_modeled_observed_merged.csv')
if not os.path.exists(slas_monthly_merged_fn):

    # Merge modeled and remotely-sensed ELAs
    slas_monthly_merged = mod_slas_monthly[['RGIId', 'Date', 'ELA_m']].merge(obs_slas_monthly[['RGIId', 'Date', 'ELA_from_AAR_m']], 
                                                                             on=['RGIId', 'Date'])
    # Remove 2023 values (no modeled data in 2023)
    slas_monthly_merged = slas_monthly_merged.loc[pd.DatetimeIndex(slas_monthly_merged['Date']).year < 2023]
    
    # Remove observations outside May - September
    slas_monthly_merged = slas_monthly_merged.loc[(pd.DatetimeIndex(slas_monthly_merged['Date']).month >=5) 
                                                & (pd.DatetimeIndex(slas_monthly_merged['Date']).month <=9)]
    
    # Rename columns
    slas_monthly_merged.rename(columns={'ELA_m': 'ELA_mod_m', 'ELA_from_AAR_m': 'ELA_obs_m'}, inplace=True)

    # Subtract the minimum snowline altitudes to mitigate datum issues, s.t. ELAs are w.r.t. 0 m. 
    for rgi_id in slas_monthly_merged['RGIId'].drop_duplicates().values:
        min_sla_obs = min_obs_sla.loc[min_obs_sla['RGIId']==rgi_id, 'SLA_min_m'].values[0]
        slas_monthly_merged.loc[slas_monthly_merged['RGIId']==rgi_id, 'ELA_obs_m'] -= min_sla_obs
        min_sla_mod = mod_slas_monthly.loc[mod_slas_monthly['RGIId']==rgi_id, 'ELA_m'].min()
        slas_monthly_merged.loc[slas_monthly_merged['RGIId']==rgi_id, 'ELA_mod_m'] -= min_sla_mod

    # Calculate differences
    slas_monthly_merged['ELA_obs-mod_m'] = slas_monthly_merged['ELA_obs_m'] - slas_monthly_merged['ELA_mod_m']

    # Save results
    slas_monthly_merged.to_csv(slas_monthly_merged_fn, index=False)
    print('Merged monthly SLAs saved to file:', slas_monthly_merged_fn)

else:
    slas_monthly_merged = pd.read_csv(slas_monthly_merged_fn)
    print('Merged monthly SLAs loaded.')


# Plot
fig, ax = plt.subplots(figsize=(6,5))
ax.hist(slas_monthly_merged['ELA_obs-mod_m'], bins=50)
ax.set_xlabel('Snowline$_{obs}$ - Snowline$_{mod}$ [m]')
ax.set_ylabel('Counts')
plt.show()


print('\nDifference stats:')
print(f'Mean diff = {np.nanmean((slas_monthly_merged["ELA_obs-mod_m"]).values)} m')
print(f'Std. diff = {np.nanstd((slas_monthly_merged["ELA_obs-mod_m"]).values)} m')
print(f'Median diff = {np.nanmedian((slas_monthly_merged["ELA_obs-mod_m"]).values)} m')
print(f'MAD diff = {MAD((slas_monthly_merged["ELA_obs-mod_m"]).values, nan_policy="omit")} m')

### Annual ELAs

In [None]:
elas_annual_merged = obs_elas_annual[['RGIId', 'Year', 'ELA_from_AAR_m']].merge(mod_elas_annual[['RGIId', 'Year', 'ELA_m']],
                                                                                on=['RGIId', 'Year'])
elas_annual_merged

In [None]:
# Define output file name
elas_annual_merged_fn = os.path.join(scm_path, 'analysis', 'ELAs_annual_mod_obs_merged.csv')
if not os.path.exists(elas_annual_merged_fn):

    # Merge modeled and remotely-sensed modeled ELAs
    elas_annual_merged = obs_elas_annual[['RGIId', 'Year', 'ELA_from_AAR_m']].merge(mod_elas_annual[['RGIId', 'Year', 'ELA_m']],
                                                                                    on=['RGIId', 'Year'])
    
    # Rename columns
    elas_annual_merged.rename(columns={'ELA_from_AAR_m': 'ELA_obs_m', 'ELA_m': 'ELA_mod_m'}, inplace=True)
    
    # Remove 2023 values (no modeled data in 2023)
    elas_annual_merged = elas_annual_merged.loc[elas_annual_merged['Year'] < 2023]
        
    # Calculate difference
    elas_annual_merged['ELA_obs-mod_m'] = elas_annual_merged['ELA_obs_m'] - elas_annual_merged['ELA_mod_m']

    # Save results
    elas_annual_merged.to_csv(elas_annual_merged_fn, index=False)
    print('Merged annual ELAs saved to file:', elas_annual_merged_fn)

else:
    elas_annual_merged = pd.read_csv(elas_annual_merged_fn)
    print('Merged annual ELAs loaded.')
    
# Plot
fig, ax = plt.subplots(figsize=(5,5))
ax.hist(elas_annual_merged['ELA_obs-mod_m'], bins=50)
ax.set_xlabel('ELA$_{obs}$ - ELA$_{mod}$ [m]')
ax.set_ylabel('Counts')
plt.show()


print('\nDifference stats:')
print(f'Mean diff = {np.nanmean((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'Std. diff = {np.nanstd((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'Median diff = {np.nanmedian((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values)} m')
print(f'MAD diff = {MAD((elas_annual_merged["ELA_obs_m"] - elas_annual_merged["ELA_mod_m"]).values, nan_policy="omit")} m')

In [None]:
for rgi_id in slas_monthly_merged['RGIId'].drop_duplicates().values:
    min_sla_obs = min_obs_sla.loc[min_obs_sla['RGIId']==rgi_id, 'SLA_min_m'].values[0]
    min_sla_mod = mod_slas_monthly.loc[mod_slas_monthly['RGIId']==rgi_id, 'ELA_m'].min()
    print(min_sla_obs, min_sla_mod)