# Sample and process ERA5 data for each study site

In [4]:
import os
import glob
import xarray as xr
import numpy as np
from tqdm.auto import tqdm
import pandas as pd
import geopandas as gpd

In [3]:
scm_dir = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping'
rgi_ids = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_dir, 'study-sites', 'RGI*')))]
rgi_ids

['RGI60-01.00032',
 'RGI60-01.00033',
 'RGI60-01.00037',
 'RGI60-01.00038',
 'RGI60-01.00046',
 'RGI60-01.00312',
 'RGI60-01.00566',
 'RGI60-01.00570',
 'RGI60-01.00576',
 'RGI60-01.00675',
 'RGI60-01.01104',
 'RGI60-01.01151',
 'RGI60-01.01390',
 'RGI60-01.01524',
 'RGI60-01.01733',
 'RGI60-01.03594',
 'RGI60-01.03622',
 'RGI60-01.03861',
 'RGI60-01.04375',
 'RGI60-01.04624',
 'RGI60-01.06268',
 'RGI60-01.06279',
 'RGI60-01.06722',
 'RGI60-01.08155',
 'RGI60-01.08174',
 'RGI60-01.08246',
 'RGI60-01.08248',
 'RGI60-01.08262',
 'RGI60-01.08288',
 'RGI60-01.08296',
 'RGI60-01.08302',
 'RGI60-01.08336',
 'RGI60-01.08353',
 'RGI60-01.08389',
 'RGI60-01.08395',
 'RGI60-01.08403',
 'RGI60-01.08412',
 'RGI60-01.08427',
 'RGI60-01.09148',
 'RGI60-01.09162',
 'RGI60-01.09216',
 'RGI60-01.09411',
 'RGI60-01.09639',
 'RGI60-01.10196',
 'RGI60-01.10555',
 'RGI60-01.10689',
 'RGI60-01.10778',
 'RGI60-01.10851',
 'RGI60-01.10857',
 'RGI60-01.11616',
 'RGI60-01.11654',
 'RGI60-01.11788',
 'RGI60-01.1

In [None]:
# Load calculated lapse rates
lapse_fn = os.path.join(scm_dir, 'Rounce_et_al_2023', 'ERA5_lapserates_monthly.nc')
lapse = xr.open_dataset(lapse_fn)

# Load ERA5 geopotential, convert to geoidal height
gp_fn = os.path.join(scm_dir, 'Rounce_et_al_2023', 'ERA5_geopotential.nc')
gp = xr.open_dataset(gp_fn).squeeze()
gp['h'] = gp['z'] / 9.81
# gp['h'].plot(vmin=-100, vmax=5e3)

# Load ERA5 monthly temperatures and precipitation
temp_fn = os.path.join(scm_dir, 'Rounce_et_al_2023', 'ERA5_temp_monthly.nc')
temp = xr.open_dataset(temp_fn)
precip_fn = os.path.join(scm_dir, 'Rounce_et_al_2023', 'ERA5_totalprecip_monthly.nc')
precip = xr.open_dataset(precip_fn)

# Iterate over study sites
for rgi_id in rgi_ids:
    # Define outputs
    era_dir = os.path.join(scm_dir, 'study-sites', rgi_id, 'ERA')
    if not os.path.exists(era_dir):
        os.mkdir(era_dir)
    era_fn = os.path.join(era_dir, f"{rgi_id}_ERA5_monthly_means.csv")
    if not os.path.exists(era_fn):
        # Load glacier boundaries
        aoi_fn = os.path.join(scm_dir, 'study-sites', rgi_id, 'AOIs', f"{rgi_id}_outline.shp")
        aoi = gpd.read_file(aoi_fn)
        
        # Subset model files to glacier
        lat, lon = aoi.CenLat.values[0], aoi.CenLon.values[0] + 360
        gp_glacier = gp.sel(latitude=lat, longitude=lon, method='nearest')
        lapse_glacier = lapse.sel(latitude=lat, longitude=lon, method='nearest')
        temp_glacier = temp.sel(latitude=lat, longitude=lon, method='nearest')
        precip_glacier = precip.sel(latitude=lat, longitude=lon, method='nearest')
        # average over the "expver" dimension
        temp_glacier = temp_glacier.mean(dim='expver') 
        precip_glacier = precip_glacier.mean(dim='expver') 
        # subset to 2012 on
        temp_glacier = temp_glacier.sel(time=slice("2012-10-01", None))
        precip_glacier = precip_glacier.sel(time=slice("2012-10-01", None))

        # Convert temperatures in K to C
        temp_glacier['t2m_C'] = temp_glacier['t2m'] - 273.15
        
        # # Resample temperatures to daily for PDD calculations
        # temp_glacier = temp_glacier.resample('1D').interpolate("linear")

        # Difference ERA5 heights from glacier elevations
        h = smb.bin_surface_h_initial
        elev_diff = h - gp_glacier['h'] - 2 # account for 2m temperature

        # Apply lapse rates to temperatures
        temp_glacier['t2m_C_adj'] = temp_glacier['t2m_C'] + (lapse_glacier['lapserate'] * elev_diff)

        # Calculate PDDs, starting cumulative count in January
        temp_glacier['PDD'] = xr.where(temp_glacier['t2m_C_adj'] > 0, temp_glacier['t2m_C_adj'], 0) # average daily for month
        temp_glacier['PDD'] = temp_glacier['PDD'] * temp_glacier.time.dt.days_in_month # total for month
        temp_glacier['PDD_cumsum'] = temp_glacier['PDD'].groupby(temp_glacier['time'].dt.year).cumsum(dim="time")

        # Estimate snow as precipitation when temperatures are negative
        precip_glacier['snow'] = xr.where(temp_glacier['t2m_C_adj'] < 0, precip_glacier['tp'], 0) # average daily for month
        precip_glacier['snow'] = precip_glacier['snow'] * precip_glacier.time.dt.days_in_month # total for month

        # Calculate cumulative annual snowfall starting in October
        def water_year_da(time):
            year = time.dt.year
            return xr.where(time.dt.month >= 10, year, year - 1)
        precip_glacier = precip_glacier.assign_coords(water_year=water_year_da(precip_glacier['time']))
        precip_glacier['snow_cumsum'] = precip_glacier['snow'].groupby("water_year").cumsum(dim="time")

        # Estimate melt factors of snow
        fsnow_df = pd.DataFrame()
        dates = slas_obs_glacier['Date'].values
        fsnows = np.zeros(len(dates))
        h_adj = h - h.min() # remove minimum elevation for comparison with observed
        for j, date in enumerate(dates):
            sla_obs_date = slas_obs_glacier.loc[slas_obs_glacier['Date']==date, 'SLA_obs_m'].values[0]
            sla_obs_date -= min_slas_obs.loc[min_slas_obs['RGIId']==rgi_id, 'SLA_obs_m_min'].values[0]
            pdd_sum = np.interp(sla_obs_date, 
                                h.values.ravel(), 
                                temp_glacier.sel(time=date)['PDD_cumsum'].values.ravel())
            snow_sum = np.interp(sla_obs_date, 
                                 h.values.ravel(), 
                                 precip_glacier.sel(time=date)['snow_cumsum'].values.ravel())
            if pdd_sum==0:
                fsnows[j] = 0
            else:
                fsnows[j] = snow_sum / pdd_sum

        df = pd.DataFrame({'RGIId': [rgi_id],
                           'fsnow_obs': [np.nanmedian(fsnows)]})   
        fsnow_obs = pd.concat([fsnow_obs, df], axis=0)
        
    # Save to file
    fsnow_obs.to_csv(fsnow_obs_fn, index=False)
    print('Observed melt factors of snow saved to file:', fsnow_obs_fn)

else:
    fsnow_obs = pd.read_csv(fsnow_obs_fn)
    print('Observed melt factors of snow loaded from file.')
    
plt.hist(fsnow_obs['fsnow_obs'] * 1e3, bins=50)
plt.xlabel('Melt factor of snow [mm $^{\circ}$C$^{-1}$ d$^{-1}$]')
plt.ylabel('Counts')
plt.show()
