# Estimate differences in modeled and remotely-sensed SMB

1. Monthly snowline altitudes (SLAs)
2. Equilibrium line altitudes (ELAs)
3. Modeled surface mass balance (SMB) at the remotely-sensed snowline
4. Degree-day factors of snow ($f_{snow}$)

In [1]:
import os
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.stats import median_abs_deviation as MAD
from tqdm.auto import tqdm
import glob
import geopandas as gpd

In [2]:
# Paths for inputs and outputs
scm_dir = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
model_dir = os.path.join(scm_dir, 'Rounce_et_al_2023')
# Load glacier boundaries for RGI IDs
aois_fn = os.path.join(scm_dir, 'analysis', 'all_aois.shp')
aois = gpd.read_file(aois_fn)

## 1. Monthly snowline altitudes

### Remotely-sensed SLAs

In [None]:
slas_obs_fn = os.path.join(scm_dir, 'analysis', 'monthly_SLAs_observed.csv')
if not os.path.exists(slas_obs_fn):
    # iterate over RGI IDs
    slas_obs = pd.DataFrame()
    for rgi_id in tqdm(sorted(aois['RGIId'].drop_duplicates().values)):
        scs_fn = os.path.join(scm_dir, 'study-sites', rgi_id, f'{rgi_id}_snow_cover_stats.csv')
        scs = pd.read_csv(scs_fn)
        scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
        scs['Year'] = scs['datetime'].dt.year
        scs['Month'] = scs['datetime'].dt.month
        scs['Day'] = scs['datetime'].dt.day
        # Filter data to within one week of the first of each month
        scs_filtered = scs[(scs['Day'] >= 25) | (scs['Day'] <= 7)]
        # Grab monthly snowline
        Imonths = []
        dates = []
        for year, month in scs_filtered[['Year', 'Month']].drop_duplicates().values:
            first_of_month = pd.Timestamp(year=year, month=month, day=1)
            # identify closest observation to this date
            scs_filtered.loc[:, 'diff'] = np.abs(scs_filtered.loc[:, 'datetime'] - first_of_month)
            Imonths.append(scs_filtered['diff'].idxmin())
            # save date 
            dates.append(pd.Timestamp(f"{year}-{month}-01"))
        scs_monthly = scs.iloc[Imonths].reset_index(drop=True)

        # add date column that is first of month
        scs_monthly['Date'] = dates

        # concatenate to full dataframe
        slas_obs = pd.concat([slas_obs, scs_monthly])

    # select relevant columns
    slas_obs.rename(columns={'ELA_from_AAR_m': 'SLA_obs_m'}, inplace=True)
    slas_obs = slas_obs[['RGIId', 'Date', 'SLA_obs_m']]
    slas_obs.reset_index(drop=True, inplace=True)

    # save to file
    slas_obs.to_csv(slas_obs_fn, index=False)
    print('Remotely-sensed monthly SLAs saved to file:', slas_obs_fn)

else:  
    slas_obs = pd.read_csv(slas_obs_fn)
    slas_obs['Date'] = pd.to_datetime(slas_obs['Date'])
    print('Remotely-sensed monthly SLAs loaded from file.')

slas_obs


### Modeled SLAs and SMB at observed SLAs

In [None]:
### Grab minimum glacier elevations for standardizing ###
# Define output file name
min_elevs_obs_fn = os.path.join(scm_dir, 'analysis', 'minimum_glacier_elevations_observed.csv')
if not os.path.exists(min_elevs_obs_fn):
    # load AOIs
    aois = gpd.read_file(aois_fn)
    min_elevs_obs = pd.DataFrame()
    # Iterate over sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load DEM
        dem_fn = glob.glob(os.path.join(scm_dir, 'study-sites', rgi_id, 'DEMs', '*.tif'))[0]
        dem = xr.open_dataset(dem_fn).squeeze()
        # Remove any wonky values
        dem = xr.where((dem < -1e3) | (dem > 1e4) | (dem==0), np.nan, dem)
        # Get minimum snowline altitude
        min_elev = np.nanmin(dem.band_data.data)
        # Add to dataframe
        df = pd.DataFrame({'RGIId': [rgi_id], 'min_elevation_m': [min_elev]})
        min_elevs_obs = pd.concat([min_elevs_obs, df], axis=0)

    # Save to file
    min_elevs_obs.reset_index(drop=True, inplace=True)
    min_elevs_obs.to_csv(min_elevs_obs_fn, index=False)
    print('Minimum remotely-sensed glacier elevations saved to file:', min_elevs_obs_fn)
    
else:
    min_elevs_obs = pd.read_csv(min_elevs_obs_fn)
    print('Minimum remotely-sensed glacier elevations loaded.')

plt.hist(min_elevs_obs['min_elevation_m'], bins=50)
plt.show()

In [None]:
# Check if file already exists
slas_mod_fn = os.path.join(scm_dir, 'analysis', 'monthly_SLAs_modeled.csv')
if not os.path.exists(slas_mod_fn):
    
    # Initialize dataframe for results
    slas_mod = pd.DataFrame()
    
    # Iterate over sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load modeled monthly SMB
        smb_fn = glob.glob(os.path.join(model_dir, 'glac_SMB_binned', f"{rgi_id.split('RGI60-0')[1]}*.nc"))[0]
        smb = xr.open_dataset(smb_fn)
        # calculate cumulative SMB
        def water_year(date):
            if date.month >= 10:
                return date.year
            else:
                return date.year - 1
        smb = smb.assign_coords({'water_year': (['time'], [water_year(t) for t in smb.time.values])})
        smb['bin_massbalclim_monthly_cumsum'] = smb['bin_massbalclim_monthly'].groupby('water_year').cumsum()
        smb['time'] = smb.time.values.astype('datetime64[D]')
        h = smb['bin_surface_h_initial'].data.ravel()
        
        # Interpolate modeled SLA as where SMB = 0 and SMB at the observed SLA
        slas = np.nan * np.zeros(len(smb.time.data))
        smb_at_slas = np.nan * np.zeros(len(smb.time.data))
        for j, t in enumerate(smb.time.data):
            smb_time = smb.sel(time=t)['bin_massbalclim_monthly_cumsum'].data[0]
            # when SMB <= 0 everywhere, set SLA to maximum glacier elevation
            if np.all(smb_time <= 0):
                slas[j] = np.max(h)
            # when SMB >= 0 everywhere, set SLA to minimum glacier elevation
            elif np.all(smb_time >= 0):
                slas[j] = np.min(h)
            # otherwise, linearly interpolate SLA
            else:
                sorted_indices = np.argsort(h)
                slas[j] = np.interp(0, smb_time[sorted_indices], h[sorted_indices])
            # interpolate the modeled SMB at the observed SLA
            sla_obs = slas_obs.loc[(slas_obs['RGIId']==rgi_id) & (slas_obs['Date']==t), 'SLA_obs_m']
            if len(sla_obs) > 0:
                smb_at_slas[j] = np.interp(sla_obs.values[0], h, smb_time)

        # Save results in dataframe
        df = pd.DataFrame({'RGIId': [rgi_id]*len(smb.time.data),
                            'Date': smb.time.data,
                            'SLA_mod_m': slas,
                            'SMB_at_obs_SLA_mwe': smb_at_slas})
        # concatenate to full dataframe
        slas_mod = pd.concat([slas_mod, df], axis=0)
        
    # Save to file
    slas_mod.reset_index(drop=True, inplace=True)
    slas_mod.to_csv(slas_mod_fn, index=False)
    print('Modeled monthly SLAs saved to file:', slas_mod_fn)
    
else:
    slas_mod = pd.read_csv(slas_mod_fn)
    slas_mod['Date'] = pd.to_datetime(slas_mod['Date'])
    print('Modeled monthly SLAs loaded from file.')

slas_mod

### Merge

In [None]:
# Define output file
slas_merged_fn = os.path.join(scm_dir, 'analysis', 'monthly_SLAs_modeled_observed_merged.csv')
if not os.path.exists(slas_merged_fn):

    # Merge modeled and remotely-sensed ELAs
    slas_merged = slas_mod[['RGIId', 'Date', 'SLA_mod_m']].merge(slas_obs[['RGIId', 'Date', 'SLA_obs_m']],
                                                                 on=['RGIId', 'Date'])
    # Remove 2023 values (no modeled data in 2023)
    slas_merged = slas_merged.loc[pd.DatetimeIndex(slas_merged['Date']).year < 2023]
    
    # Remove observations outside May - September
    slas_merged = slas_merged.loc[(pd.DatetimeIndex(slas_merged['Date']).month >=5) 
                                  & (pd.DatetimeIndex(slas_merged['Date']).month <=9)]

    # Subtract the minimum snowline altitudes to mitigate datum issues, s.t. SLAs are w.r.t. 0 m. 
    # for rgi_id in slas_merged['RGIId'].drop_duplicates().values:
    #     min_sla_obs = min_slas_obs.loc[min_slas_obs['RGIId']==rgi_id, 'SLA_obs_m_min'].values[0]
    #     slas_merged.loc[slas_merged['RGIId']==rgi_id, 'SLA_obs_m'] -= min_sla_obs
    #     min_sla_mod = slas_mod.loc[slas_mod['RGIId']==rgi_id, 'SLA_mod_m'].min()
    #     slas_merged.loc[slas_merged['RGIId']==rgi_id, 'SLA_mod_m'] -= min_sla_mod

    # Save results
    slas_merged.to_csv(slas_merged_fn, index=False)
    print('Merged monthly SLAs saved to file:', slas_merged_fn)

else:
    slas_merged = pd.read_csv(slas_merged_fn)
    print('Merged monthly SLAs loaded.')


slas_merged['SLA_mod-obs_m'] = slas_merged['SLA_mod_m'] - slas_merged['SLA_obs_m']

# Plot
fig, ax = plt.subplots(figsize=(6,5))
ax.hist(slas_merged['SLA_mod-obs_m'], bins=50)
ax.set_xlabel('SLA$_{mod}$ - SLA$_{obs}$ [m]')
ax.set_ylabel('Counts')
plt.show()


print('\nDifference stats:')
print(f'Mean diff = {np.nanmean((slas_merged["SLA_mod-obs_m"]).values)} m')
print(f'Std. diff = {np.nanstd((slas_merged["SLA_mod-obs_m"]).values)} m')
print(f'Median diff = {np.nanmedian((slas_merged["SLA_mod-obs_m"]).values)} m')
print(f'MAD diff = {MAD((slas_merged["SLA_mod-obs_m"]).values, nan_policy="omit")} m')

## 2. ELAs

### Modeled ELAs

In [None]:
elas_mod_fn = os.path.join(scm_dir, 'analysis', 'annual_ELAs_modeled.csv')
if not os.path.exists(elas_mod_fn):
    # Add Year column
    slas_mod['Year'] = pd.DatetimeIndex(slas_mod['Date']).year
    slas_mod = slas_mod.loc[slas_mod['Year'] < 2023].reset_index(drop=True) # remove 2023 observations
    # Identify the row of maximum ELA for each site and each year
    Imax = slas_mod.groupby(by=['RGIId', 'Year'])['SLA_mod_m'].idxmax().values
    elas_mod = slas_mod.iloc[Imax].reset_index(drop=True)
    elas_mod.rename(columns={'SLA_mod_m': 'ELA_mod_m'}, inplace=True)
    # Reorder columns
    elas_mod = elas_mod[['RGIId', 'Date', 'Year', 'ELA_mod_m']]
    # Save to file
    elas_mod.to_csv(elas_mod_fn, index=False)
    print('Modeled annual ELAs saved to file:', elas_mod_fn)
else:
    elas_mod = pd.read_csv(elas_mod_fn)
    elas_mod['Date'] = pd.to_datetime(elas_mod['Date'])
    print('Modeled annual ELAs loaded from file.')
    
elas_mod

### Remotely-sensed ELAs

In [None]:
elas_obs_fn = os.path.join(scm_dir, 'analysis', 'annual_ELAs_observed.csv')
if not os.path.exists(elas_obs_fn):
    # iterate over sites
    elas_obs = pd.DataFrame()
    for rgi_id in tqdm(slas_obs['RGIId'].drop_duplicates().values):
        # Subset to site
        slas_obs_site = slas_obs.loc[slas_obs['RGIId']==rgi_id].reset_index(drop=True)
        # Subset to 2016–2023
        slas_obs_site = slas_obs_site.loc[slas_obs_site['Date'].dt.year >= 2016].reset_index(drop=True)
        # identify maximum annual SLA
        imax = slas_obs_site.groupby(slas_obs_site['Date'].dt.year)['SLA_obs_m'].idxmax().values
        df = slas_obs_site.iloc[imax]
        # concatenate to full dataframe
        elas_obs = pd.concat([elas_obs, df])
    elas_obs.reset_index(drop=True, inplace=True)
    elas_obs.rename(columns={'SLA_obs_m': 'ELA_obs_m'}, inplace=True)

    # save to file
    elas_obs.to_csv(elas_obs_fn, index=False)
    print('Remotely-sensed ELAs saved to file:', elas_obs_fn)

else:
    elas_obs = pd.read_csv(elas_obs_fn)
    elas_obs['Date'] = pd.to_datetime(elas_obs['Date'])
    print('Remotely-sensed ELAs loaded from file.')

elas_obs

### Merged

In [None]:
# Define output file name
elas_merged_fn = os.path.join(scm_dir, 'analysis', 'annual_ELAs_modeled_observed_merged.csv')
if not os.path.exists(elas_merged_fn):

    # Merge modeled and remotely-sensed modeled ELAs
    elas_obs['Year'] = elas_obs['Date'].dt.year
    elas_merged = elas_obs[['RGIId', 'Year', 'ELA_obs_m']].merge(elas_mod[['RGIId', 'Year', 'ELA_mod_m']],
                                                                 on=['RGIId', 'Year'])
    
    # Subset to 2016–2022 (no modeled data in 2023)
    elas_merged = elas_merged.loc[(elas_merged['Year'] >= 2016) 
                                  & (elas_merged['Year'] < 2023)]
        
    # Save results
    elas_merged.to_csv(elas_merged_fn, index=False)
    print('Merged annual ELAs saved to file:', elas_merged_fn)

else:
    elas_merged = pd.read_csv(elas_merged_fn)
    print('Merged annual ELAs loaded.')
    
# Calculate difference
elas_merged['ELA_mod-obs_m'] = elas_merged['ELA_mod_m'] - elas_merged['ELA_obs_m']

# Plot
fig, ax = plt.subplots(figsize=(5,5))
ax.hist(elas_merged['ELA_mod-obs_m'], bins=50)
ax.set_xlabel('ELA$_{mod}$ - ELA$_{obs}$ [m]')
ax.set_ylabel('Counts')
plt.show()

print('\nDifference stats:')
print(f"Mean diff = {np.nanmean(elas_merged['ELA_mod-obs_m'])} m")
print(f"Std. diff = {np.nanstd(elas_merged['ELA_mod-obs_m'])} m")
print(f"Median diff = {np.nanmedian(elas_merged['ELA_mod-obs_m'])} m")
print(f"MAD diff = {MAD(elas_merged['ELA_mod-obs_m'], nan_policy='omit')} m")

## 3. Degree-day factors of snow

### Modeled

In [None]:
# Check if already exists in file
fsnow_mod_fn = os.path.join(scm_dir, 'analysis', 'fsnow_modeled.csv')
if not os.path.exists(fsnow_mod_fn):
    print('Compiling modeled melt factors of snow')
    modelprm_dir = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/Rounce_et_al_2023/modelprms'
    # Initialize dataframe
    fsnow_mod = pd.DataFrame()
    # Iterate over RGI IDs
    for rgi_id in tqdm(slas_mod['RGIId'].drop_duplicates().values):
        # Load model parameters
        modelprm_fn = os.path.join(modelprm_dir, f"{rgi_id.replace('RGI60-0','')}-modelprms_dict.pkl")
        modelprm = pd.read_pickle(modelprm_fn)
        # Take the median of MCMC fsnow results (not much different than the mean)
        ddfsnow_mcmc = np.array(modelprm['MCMC']['ddfsnow']['chain_0'])
        df = pd.DataFrame({"RGIId": [rgi_id],
                           "fsnow_mod_m/C/d": [np.median(ddfsnow_mcmc)]})
        # Concatenate df to full dataframe
        fsnow_mod = pd.concat([fsnow_mod, df])
    # Save to file
    fsnow_mod.reset_index(drop=True, inplace=True)
    fsnow_mod.to_csv(fsnow_mod_fn, index=False)
    print('Compiled melt factors of snow saved to file:', fsnow_mod_fn)

else:
    fsnow_mod = pd.read_csv(fsnow_mod_fn)
    print('Compiled melt factors of snow loaded from file.')

plt.hist(fsnow_mod['fsnow_mod_m/C/d'] * 1e3, bins=100)
plt.xlabel('Melt factor of snow [mm $^{\circ}$C$^{-1}$ d$^{-1}$]')
plt.ylabel('Counts')
plt.show()


### Observed

Adjust the modeled degree-day factors of snow ($f_{snow}$) using the modeled SMB and cumulative PDDs from ERA5 downscaled to the snowline.

$SMB(x,t) = Accumulation - Melt = \Sigma Snowfall(x,t) - \sum_{t_{melt}}^t PDD(x,t) \cdot \Delta t \cdot f_{snow}$

where $t_{melt}$ is the start of the melt season and $\Delta t$ is $t-t_{melt}$. 

At the snowline, SMB = 0. Rearranging:

$f_{snow}(x,t) = \frac{\Sigma Snowfall(x,t)}{\sum_{t_{melt}}^t PDD(x,t) \cdot \Delta t} $

If SMB = 10 m at the snowline on day 100 of the melt season and the cumulative PDD are 100 $^{\circ}C$, this means that the model underestimated melt by 10 m / (100 $^{\circ}C \cdot$ 100 days) = 0.001 m/C/d. If the modeled melt factor of snow is 2 m/C/d, adjust the fsnow to 2.001 m/C/d. 

In [None]:
fsnow_obs_fn = os.path.join(scm_dir, 'analysis', 'fsnow_observed.csv')
if 1==1: #not os.path.exists(fsnow_obs_fn):
    # Intialize results for all sites
    fsnow_obs = pd.DataFrame()
    
    # Iterate over sites
    for rgi_id in tqdm(slas_obs['RGIId'].drop_duplicates().values):
        if type(rgi_id) != str:
            continue
        if 'RGI' not in rgi_id:
            continue
        
        ### Load input data
        # Get modeled fsnow
        fsnow_mod_site = fsnow_mod.loc[fsnow_mod['RGIId']==rgi_id, 'fsnow_mod_m/C/d'].values[0]
        # Load ERA-Land data
        era_fn = os.path.join(scm_dir, 'study-sites', rgi_id, 'ERA', f"{rgi_id}_ERA5-Land_daily_means.csv")
        era_df = pd.read_csv(era_fn)
        era_df['Date'] = pd.to_datetime(era_df['Date'])
        # Load centerline elevation profile
        smb_fn = glob.glob(os.path.join(model_dir, 'glac_SMB_binned', f"{rgi_id.split('RGI60-0')[1]}*.nc"))[0]
        smb = xr.open_dataset(smb_fn).squeeze()
        h = smb.bin_surface_h_initial.values.ravel()
        # Grab observed snowlines
        slas_obs_site = slas_obs.loc[slas_obs['RGIId']==rgi_id]
        slas_obs_site['Year'] = slas_obs_site['Date'].dt.year # add year column
        # Grab modeled SMB at observed snowlines
        slas_obs_site = pd.merge(slas_obs_site, slas_mod[['RGIId', 'Date', 'SMB_at_obs_SLA_mwe']], on=['RGIId', 'Date'])
        # Don't include dates after September
        slas_obs_site = slas_obs_site.loc[slas_obs_site['Date'].dt.month < 9]
        
        ### Downscale air temperatures to glacier surface using lapse rates, calculate PDDs
        era_ds = xr.Dataset(
            coords={'h': h, 'time': era_df['Date']},
            data_vars={'temp_C': (['time'], era_df['mean_temperature_2m_C'].values),
                    'lapse_rate': (['time'], era_df['lapse_rate_C/m'])})
        era_ds['h_diff'] = era_df['ERA5_height_mean_m'].values[0] - era_ds['h']
        era_ds['temp_downscaled_C'] = era_ds['temp_C'] - era_ds['lapse_rate'] * era_ds['h_diff']
        era_ds['PDD'] = xr.where(era_ds['temp_downscaled_C'] > 0, era_ds['temp_downscaled_C'], 0)
        era_ds['PDD_cumsum'] = era_ds['PDD'].groupby('time.year').cumsum()
        
        ### Identify the melt season start date (first PDD > 0) for each elevation
        def find_first_positive(group):
            # Mask PDD = 0
            mask = group > 0
            # Find the first index where PDD > 0
            first_index = mask.argmax(dim="time")
            # Check if no positive PDD exists for the group
            no_positive = ~mask.any(dim="time")
            # Grab the corresponding time values
            time_values = group["time"].isel(time=first_index)
            # Replace invalid times with NaT for no_positive cases
            time_values = time_values.where(~no_positive, np.datetime64("NaT"))
            return time_values
        # Apply the function to each year and elevation group
        era_ds['melt_season_start_date'] = (era_ds["PDD_cumsum"]
                                            .groupby("time.year")
                                            .map(find_first_positive))
        
        ### Interpolate cumulative PDDs at SLAs
        pdds_slas = np.array([float(era_ds.sel(time=date, h=sla, method='nearest')['PDD_cumsum'].values) 
                              for date,sla in slas_obs_site[['Date', 'SLA_obs_m']].values])

        ### Interpolate melt season start dates at SLAs
        melt_start_dates = np.array([era_ds.sel(year=year, h=sla, method='nearest')['melt_season_start_date'].values
                                     for year, sla in slas_obs_site[['Year', 'SLA_obs_m']].values])

        # Compile in dataframe
        slas_obs_site['Year'] = slas_obs_site['Date'].dt.year
        slas_obs_site['PDD_cumsum_at_SLA_C'] = pdds_slas
        slas_obs_site['melt_season_start_date'] = melt_start_dates
        slas_obs_site = slas_obs_site.loc[slas_obs_site['PDD_cumsum_at_SLA_C'] > 0] # remove rows with 0 PDDs (to avoid dividing by 0)
        slas_obs_site['days_since_melt_season_start_date'] = ((slas_obs_site['Date'] - slas_obs_site['melt_season_start_date']) / np.timedelta64(1, 'D')).astype(int)
        
        ### Calculate adjustment for modeled fsnow
        slas_obs_site['fsnow_mod_adj'] = (slas_obs_site['SMB_at_obs_SLA_mwe'] 
                                          / (slas_obs_site['PDD_cumsum_at_SLA_C'] 
                                             * slas_obs_site['days_since_melt_season_start_date']))
                
        ### Add adjustment to fsnow_mod
        slas_obs_site['fsnow_obs'] = fsnow_mod_site + slas_obs_site['fsnow_mod_adj']
        
        # Remove unrealistic values
        slas_obs_site.loc[slas_obs_site['fsnow_obs'] > 0.01, 'fsnow_obs'] = np.nan
        slas_obs_site.loc[slas_obs_site['fsnow_obs'] <= 0.0, 'fsnow_obs'] = np.nan
        
        ### Save the median
        df = pd.DataFrame({'RGIId': [rgi_id],
                           'fsnow_obs_m/C/d': [np.nanmedian(slas_obs_site['fsnow_obs'])]})
        fsnow_obs = pd.concat([fsnow_obs, df], axis=0)
        
        ### Plot an example
        if rgi_id=='RGI60-01.00032':
            plt.rcParams.update({'font.size': 12, 'font.sans-serif': 'Arial'})
            fig, ax = plt.subplots(3, 1, figsize=(8,10))
            ax[0].plot(era_ds.time.data, era_ds['temp_C'], '-k', linewidth=0.5)
            ax[0].set_ylabel('Air temperature [$^{\circ}$C]')
            ax[0].grid()
            cmap = matplotlib.colors.LinearSegmentedColormap.from_list('my_cmap', ['w', '#fb6a4a', '#67000d']) # white to red
            era_ds['PDD_cumsum'].transpose().plot(cmap=cmap, ax=ax[1], 
                                                cbar_kwargs={'orientation': 'horizontal', 
                                                            'shrink': 0.5, 
                                                            'label': 'Cumulative PDD [$^{\circ}$C]'})
            ax[1].set_ylabel('Elevation [m]')
            ax[1].plot(slas_obs_site['Date'], slas_obs_site['SLA_obs_m'], '*k', label='Snowline altitude')
            # melt season start
            for year in era_ds.year.data[1:]:
                era_ds_year = era_ds.sel(time=slice(f"{year}-01-01", f"{year}-08-01"))
                xmesh, ymesh = np.meshgrid(era_ds_year.time.data, era_ds.h.data)
                ax[1].contour(xmesh, ymesh, era_ds_year['PDD_cumsum'].data.transpose(), levels=[0], colors=['gray'])
            ax[1].plot(pd.Timestamp('2010-01-01'), 0, '-', color='gray', label='Melt season start')
                        
            ax[1].legend(loc='upper left')
            ax[1].set_xlabel('')
            ax[2].plot(slas_obs_site['Date'], slas_obs_site['fsnow_obs'] * 1e3, '.k')
            ax[2].axhline(fsnow_mod_site * 1e3, color='k', linestyle='--', label='Modeled')
            ax[2].axhline(slas_obs_site['fsnow_obs'].median() * 1e3, color='k', label='Observed median')
            ax[2].legend(loc='lower left')
            ax[2].set_ylabel('$f_{snow}$ [mm $^{\circ}$C$^{-1}$ d$^{-1}$]')
            ax[2].grid()
            ax[2].set_ylim(2, 5)
            labels = ['a', 'b', 'c']
            for i, axis in enumerate(ax):
                axis.set_xlim(np.datetime64('2013-01-01'), np.datetime64('2023-01-01'))
                axis.text(0.97, 0.85, labels[i], transform=axis.transAxes, fontweight='bold', fontsize=16)
            fig.tight_layout()
            plt.show()
            # save figure
            fig_fn = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis/figures/figS4_melt_factors_example.png'
            fig.savefig(fig_fn)
            print('Figure saved to file:', fig_fn)
        
    # Save results to file
    fsnow_obs.to_csv(fsnow_obs_fn, index=False)
    print('Observed fsnow saved to file:', fsnow_obs_fn)  
    
else:
    fsnow_obs = pd.read_csv(fsnow_obs_fn)
    print('Observed fsnow loaded from file')  

plt.hist(fsnow_obs['fsnow_obs_m/C/d'] * 1e3, bins=100)
plt.xlabel('Melt factor of snow [mm $^{\circ}$C$^{-1}$ d$^{-1}$]')
plt.ylabel('Counts')
plt.show()    

In [None]:
# Plot differences between modeled and observed fsnow
import seaborn as sns
fsnow_merged = pd.merge(fsnow_mod, fsnow_obs, on='RGIId')
fsnow_merged['fsnow_mod-fsnow_obs_m/C/d'] = fsnow_merged['fsnow_mod_m/C/d'] - fsnow_merged['fsnow_obs_m/C/d']
fsnow_merged['fsnow_mod-fsnow_obs_mm/C/d'] = fsnow_merged['fsnow_mod-fsnow_obs_m/C/d'] * 1e3

clusters_fn = os.path.join(scm_dir, 'analysis', 'climate_clusters.csv')
clusters = pd.read_csv(clusters_fn)

fsnow_merged = pd.merge(fsnow_merged, clusters[['RGIId', 'clustName']], on='RGIId')

sns.boxplot(fsnow_merged, x='fsnow_obs_m/C/d', hue='clustName')
plt.show()
sns.kdeplot(fsnow_merged, x='fsnow_mod-fsnow_obs_mm/C/d', hue='clustName') #, clip=(-0.5, 0.5))
plt.show() 