# Calculate median weekly trends in snow cover for each study site

In [None]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.stats import iqr
import seaborn as sns
import numpy as np
import xarray as xr
from scipy.stats import median_abs_deviation
import sys

## Load compiled glacier boundaries (AOIs) and climate clusters

In [None]:
# Define root path to data for convenience
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'

# Import utility functions
code_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis'
sys.path.append(os.path.join(code_path, 'scripts'))
import utils as f

# Define output directory
out_path = os.path.join(scm_path, 'dataset', 'analysis')

# Load AOIs
aois_fn = os.path.join(scm_path, 'dataset', 'AOIs.gpkg')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All glacier boundaries loaded from file.')

# Load climate clusters
clusters_fn = os.path.join(out_path, 'climate_clusters.csv')
clusters = pd.read_csv(clusters_fn)
print('Clusters loaded from file.')

## Pre-processing: Compile classified image and statistics files for each site into a single zarr file

In [None]:
# Iterate over study sites
rgi_ids = aois['RGIId'].unique()
for i, rgi_id in enumerate(rgi_ids):
    print(i+1, rgi_id)
    out_fn = os.path.join(scm_path, 'study-sites', rgi_id, f"{rgi_id}_classifications.zarr")
    if not os.path.exists(out_fn):
        aoi = aois.loc[aois['RGIId']==rgi_id].reset_index(drop=True)
        f.compile_classified_image_files(scm_path, rgi_id, aoi)

## Calculate weekly median trends for each site

Conduct Monte Carlo simulations for sampling, with number of simulations = 100 and percent sampled = 80%. 

In [None]:
scs_MCs_fn = os.path.join(out_path, 'median_snow_cover_stats_MC.nc')
if not os.path.exists(scs_MCs_fn):
    # Set up Monte Carlo parameters
    nMC = 100  # Number of Monte Carlo simulations
    sample_fraction = 0.8  # Fraction of data to sample in each simulation

    # Initialize list to store results for all sites
    scs_MCs_list = []

    # Iterate over study sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values[0:1]):
        # Load and preprocess data
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f'{rgi_id}_classifications.zarr')
        scs_site = f.load_snow_cover_stats(scs_fn)
        scs_site = scs_site.assign_coords({'WOY': scs_site['time'].dt.isocalendar().week})

        # Number of samples per simulation
        nsamp = int(len(scs_site.time.data) * sample_fraction)

        # Monte Carlo simulations
        results = []
        for i in range(nMC):
            sampled_times = np.random.choice(scs_site.time.data, size=nsamp, replace=False)
            scs_site_MC_list = [scs_site.sel(time=time) for time in sampled_times]
            scs_site_MC = xr.concat(scs_site_MC_list, dim='time').sortby('time')

            # Calculate weekly medians for AAR, SCA, and ELA
            weekly_medians = scs_site_MC.groupby('WOY').median().to_dataframe().reset_index()
            weekly_medians['MC_run'] = i
            results.append(weekly_medians)

        # Concatenate results for all Monte Carlo runs
        all_results = pd.concat(results)
        all_results['RGIId'] = rgi_id # add RGI ID
        
        # Convert to xarray.Dataset
        ds = all_results.set_index(['RGIId', 'MC_run', 'WOY']).to_xarray()
        scs_MCs_list.append(ds)

    # Combine results into a single xarray.Dataset
    scs_MCs_ds = xr.concat(scs_MCs_list, dim='RGIId')
    
    # Add data variable attributes
    scs_MCs_ds.attrs['title'] = 'Weekly Snow Cover Monte Carlo Simulations'
    scs_MCs_ds.attrs['description'] = ('For each study glacier, snow cover observations were grouped by week of year (WOY). '
                                      'In each of 100 Monte Carlo iterations (MC_run), 80% of observations and their corresponding '
                                      'snow cover statistics were randomly sampled.')
    scs_MCs_ds.attrs['institution'] = 'Boise State University'
    scs_MCs_ds.attrs['references'] = 'doi:10.1029/2025GL115523'
    scs_MCs_ds.attrs['horizontal_CRS'] = 'WGS84 (EPSG:4326)'
    scs_MCs_ds.attrs['vertical_CRS'] = 'EGM96 geoid (EPSG:5773)'
    scs_MCs_ds.attrs['date_modified'] = '2025-06-07'
    scs_MCs_ds.attrs['time_coverage_start'] = '2013-05-01'
    scs_MCs_ds.attrs['time_coverage_end'] = '2023-10-31'
    scs_MCs_ds['AAR'].attrs['long_name'] = 'accumulation area ratio'
    scs_MCs_ds['AAR'].attrs['units'] = 'unitless'
    scs_MCs_ds['SLA'].attrs['long_name'] = 'snowline altitude'
    scs_MCs_ds['SLA'].attrs['units'] = 'meters above sea level'
    scs_MCs_ds['SLA_lower_bound'].attrs['long_name'] = 'snowline altitude lower bound'
    scs_MCs_ds['SLA_lower_bound'].attrs['units'] = 'meters above sea level'
    scs_MCs_ds['SLA_upper_bound'].attrs['long_name'] = 'snowline altitude upper bound'
    scs_MCs_ds['SLA_upper_bound'].attrs['units'] = 'meters above sea level'
    for dv in ['snow_area', 'ice_area', 'water_area']:
        scs_MCs_ds[dv].attrs['units'] = 'meters squared'
        scs_MCs_ds[dv].attrs['long_name'] = (dv).split('_')[0] + ' cover area'
    
    # Make sure CRS is set
    scs_MCs_ds = scs_MCs_ds.rio.write_crs("EPSG:4326")

    # Save to file
    scs_MCs_ds.to_netcdf(scs_MCs_fn)
    print("Monte Carlo simulations completed and saved to file:", scs_MCs_fn)

else:
    scs_MCs_ds = xr.open_dataset(scs_MCs_fn)
    print('Monte Carlo simulations loaded from file:', scs_MCs_fn)
    
scs_MCs_ds


In [None]:
# -----Compile minimum snow cover statistics
min_aars_woys_fn = os.path.join(out_path, 'minimum_snow_cover_stats.csv') 
# check if exists in directory
if not os.path.exists(min_aars_woys_fn):
    # initialize dataframe for RGI stats and minimum snow cover statts
    min_aars_woys = pd.DataFrame()
    
    # iterate over site names in median snow cover stats dataframe
    for rgi_id in tqdm(scs_MCs_ds.RGIId.values):
        
        # Calculate AAR median and MAD across MC simulations
        aar_median = float(scs_MCs_ds.sel(RGIId=rgi_id)['AAR'].median(dim='MC_run').min(dim='WOY').values)
        aar_mad = float(median_abs_deviation(scs_MCs_ds.sel(RGIId=rgi_id)['AAR'].min(dim='WOY').values))
        
        # Calculate WOY median and MAD across MC simulations
        Imins = scs_MCs_ds.sel(RGIId=rgi_id)['AAR'].argmin(dim='WOY').values
        woys = scs_MCs_ds.WOY.values[Imins]
        woy_median = int(np.nanmedian(woys))
        woy_mad = int(median_abs_deviation(woys))        

        df = pd.DataFrame({
            'RGIId': [rgi_id],
            'WOY_median': [woy_median],
            'WOY_MAD': [woy_mad],
            'AAR_median': [aar_median],
            'AAR_MAD': [aar_mad],
            })
        # concatenate to full dataframe
        min_aars_woys = pd.concat([min_aars_woys, df], axis=0)

    # save to file
    min_aars_woys.to_csv(min_aars_woys_fn, index=False)
    print('Minimum AARs and WOYs saved to file: ', min_aars_woys_fn)
        
else:
    # load from file
    min_aars_woys = pd.read_csv(min_aars_woys_fn)
    print('Minimum AARs and WOYs loaded from file.')

min_aars_woys

In [None]:
# Add subregion and cluster columns
if 'Subregion' not in min_aars_woys.keys():
    min_aars_woys = min_aars_woys.merge(aois[['RGIId', 'Subregion']], on='RGIId')
if 'clustName' not in min_aars_woys.keys():
    min_aars_woys = min_aars_woys.merge(clusters[['RGIId', 'clustName']], on='RGIId')

fig, ax = plt.subplots(2, 1, figsize=(10, 12))
sns.boxplot(data=min_aars_woys, x='AAR_median', palette='mako', hue='Subregion', ax=ax[0])
sns.boxplot(data=min_aars_woys, x='AAR_median', hue='clustName', ax=ax[1])
fig.suptitle('AARs')
fig.tight_layout()
plt.show()

fig, ax = plt.subplots(2, 1, figsize=(10, 12))
sns.boxplot(data=min_aars_woys, x='WOY_median', palette='mako', hue='Subregion', ax=ax[0])
sns.boxplot(data=min_aars_woys, x='WOY_median', hue='clustName', ax=ax[1])
fig.suptitle('WOYs')
fig.tight_layout()
plt.show()

In [None]:
# Print stats
print('Median AAR = ', min_aars_woys['AAR_median'].median())
print('Median WOY = ', min_aars_woys['WOY_median'].median())

In [None]:
# Print stats
print('Median AAR MAD = ', min_aars_woys['AAR_MAD'].median())
print('Median WOY MAD = ', min_aars_woys['WOY_MAD'].median())


## Estimate debris cover area at each site

In [None]:
debris_areas_fn = os.path.join(out_path, 'debris_cover_areas.csv')
if not os.path.exists(debris_areas_fn):
    # Initialize dataframe
    debris_areas = pd.DataFrame()

    # Iterate over sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # get median WOY of snow cover minimum
        woy = min_aars_woys.loc[min_aars_woys['RGIId']==rgi_id, 'WOY_median'].values[0]

        # load snow cover stats
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f"{rgi_id}_snow_cover_stats_adjusted2.csv")
        scs = pd.read_csv(scs_fn)
        scs['datetime'] = pd.to_datetime(scs['datetime'])
        # add WOY column
        scs['WOY'] = scs['datetime'].dt.isocalendar().week

        # estimate debris cover area and percentage of total area at WOY
        df = scs.groupby('WOY')[['glacier_area_m2', 'debris_area_m2']].mean().reset_index()
        df = df.loc[df['WOY']==woy].reset_index(drop=True)
        df['RGIId'] = rgi_id

        # concatenate to full site
        debris_areas = pd.concat([debris_areas, df], axis=0)

    # Add percent debris coverage column
    debris_areas['debris_percent_area'] = debris_areas['debris_area_m2'] / debris_areas['glacier_area_m2']

    # Save to file
    debris_areas.reset_index(drop=True, inplace=True)
    debris_areas.to_csv(debris_areas_fn, index=False)
    print('Debris cover areas saved to file:', debris_areas_fn)

else:
    debris_areas = pd.read_csv(debris_areas_fn)

# Plot histogram
plt.hist(debris_areas['debris_percent_area'], bins=100)
plt.show()

debris_areas



In [None]:
debris_areas.loc[debris_areas['debris_percent_area'] > 0.05]

## Assess interannual variability in AAR magnitude and timing at each site

In [None]:
aar_var_stats_fn = os.path.join(out_path, 'minimum_snow_cover_stats_interannual_variability_2016-2023.csv')
if os.path.exists(aar_var_stats_fn):
    aar_var_stats = pd.read_csv(aar_var_stats_fn)
    print('AAR interannual variability stats loaded from file.')

else:
    aar_var_stats = pd.DataFrame()
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f'{rgi_id}_classifications.zarr')
        scs = f.load_snow_cover_stats(scs_fn)
        # Add Year and WOY coordinates
        scs = scs.assign_coords({'Year': scs['time'].dt.isocalendar().year})
        scs = scs.assign_coords({'WOY': scs['time'].dt.isocalendar().week})
        # subset to 2016+
        scs = scs.sel(time=slice(pd.Timestamp('2016-01-01'), None))
        # identify annual AAR magnitudes and WOY timing
        def get_min_time(group):
            return group['time'].isel(time=group['AAR'].argmin(dim='time'))
        min_times = scs.groupby('Year').apply(get_min_time)
        annual_mins_site = xr.concat([scs.sel(time=time) for time in min_times.values], dim='time')
        df = pd.DataFrame({'RGIId': [rgi_id],
                            'AAR_min': [float(annual_mins_site['AAR'].min().values)],
                            'AAR_max': [float(annual_mins_site['AAR'].max().values)],
                            'AAR_median': [float(annual_mins_site['AAR'].median().values)],
                            'AAR_IQR': [iqr(annual_mins_site['AAR'].values)],
                            'WOY_min': [float(annual_mins_site['WOY'].min().values)],
                            'WOY_max': [float(annual_mins_site['WOY'].max().values)],
                            'WOY_median': [float(annual_mins_site['WOY'].median().values)],
                            'WOY_IQR': [iqr(annual_mins_site['WOY'].values)]})  
        aar_var_stats = pd.concat([aar_var_stats, df])
    
    aar_var_stats.reset_index(drop=True, inplace=True)

    # Save to file
    aar_var_stats.to_csv(aar_var_stats_fn, index=False)
    print('AAR interannual variability stats saved to file:', aar_var_stats_fn)

aar_var_stats


In [None]:
# -----Print stats
aar_var_stats['AAR_range'] = aar_var_stats['AAR_max'] - aar_var_stats['AAR_min']
print(f"AAR range for all sites: {iqr(aar_var_stats['AAR_min'])} - {iqr(aar_var_stats['AAR_max'])}")
# print('By subregion:')
# print('Median')
# print(aar_var_stats.groupby(['Subregion'])['AAR_range'].median())
# print('\n')
# print('IQR')
# print(aar_var_stats.groupby(['Subregion'])['AAR_range'].apply(iqr))

In [None]:
aar_var_stats['WOY_range'] = aar_var_stats['WOY_max'] - aar_var_stats['WOY_min']
print(f"AAR TIMING range for all sites: {aar_var_stats['WOY_range'].median()} +/- {iqr(aar_var_stats['WOY_range'])}\n")
# print('By subregion:')
# print('Median')
# print(aar_var_stats.groupby(['Subregion'])['WOY_range'].median())
# print('\n')
# print('IQR')
# print(aar_var_stats.groupby(['Subregion'])['WOY_range'].apply(iqr))

## Identify the approximate start and end of the melt season in each subregion from ERA data

In [None]:
melt_season_fn = os.path.join(out_path, 'melt_season_timing.csv')

if not os.path.exists(melt_season_fn):
    melt_season_df = pd.DataFrame()
    
    # Iterate over sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load ERA data
        era_fn = os.path.join(scm_path, 'study-sites', rgi_id, 'ERA', f'{rgi_id}_ERA5-Land_daily_means.csv')
        era = pd.read_csv(era_fn)
        era['Date'] = pd.to_datetime(era['Date'])
    
        # Add WOY and year columns
        era['WOY'] = era['Date'].dt.isocalendar().week
        era['year'] = era['Date'].dt.isocalendar().year
    
        # Calculate weekly medians for 2013–2022
        era = era.loc[era['year'] > 2012]
        if '.geo' in era.keys():
            era = era.drop(columns=['.geo'])
        era_weekly_median = era.groupby('WOY').median().reset_index()
    
        # Estimate start and end of melt seasons
        # Start = positive PDDs
        try:
            woy_start = era_weekly_median.loc[era_weekly_median['positive_degree_days_annual_cumsum'] > 0, 'WOY'].values[0]
        except:
            woy_start = 52
        # End = after July, 0 PDDs, positive snowfall
        woy_end = era_weekly_median.loc[(era_weekly_median['WOY'] > 30) 
                                     & (era_weekly_median['positive_degree_days'] == 0) 
                                     & (era_weekly_median['mean_snowfall_sum'] > 0), 'WOY'].values[0]
        
        # Add to full dataframe
        df = pd.DataFrame({'RGIId': [rgi_id], 
                           'melt_season_start_WOY': [woy_start],
                           'melt_season_end_WOY': [woy_end],
                          })
        melt_season_df = pd.concat([melt_season_df, df], axis=0)
    
    # Save to file
    melt_season_df.reset_index(drop=True, inplace=True)
    melt_season_df.to_csv(melt_season_fn, index=False)
    print('Melt season timing CSV saved to file:', melt_season_fn)

else:
    melt_season_df = pd.read_csv(melt_season_fn)
    print('Melt season timing CSV loaded from file.')

melt_season_df
        

In [None]:
# Plot some results

# Add subregion and cluster columns
if 'Subregion' not in melt_season_df.keys():
    melt_season_df['Subregion'] = ''
    melt_season_df['clustName'] = ''
    for rgi_id in melt_season_df['RGIId'].drop_duplicates().values:
        melt_season_df.loc[melt_season_df['RGIId']==rgi_id, 'Subregion'] = aois.loc[aois['RGIId']==rgi_id, 'Subregion'].values
        melt_season_df.loc[melt_season_df['RGIId']==rgi_id, 'clustName'] = clusters.loc[clusters['RGIId']==rgi_id, 'clustName'].values

nsubregions = len(melt_season_df['Subregion'].drop_duplicates().values)
fig, ax = plt.subplots(nsubregions, 1, figsize=(8, nsubregions*4))
for i, subregion in enumerate(melt_season_df['Subregion'].drop_duplicates().values):
    melt_season_subregion_df = melt_season_df.loc[melt_season_df['Subregion']==subregion]
    ax[i].hist(melt_season_subregion_df['melt_season_start_WOY'], bins=20, facecolor='m', alpha=0.5)
    ax[i].axvline(melt_season_subregion_df['melt_season_start_WOY'].mean(), color='m', linewidth=2)
    ax[i].hist(melt_season_subregion_df['melt_season_end_WOY'], bins=20, facecolor='b', alpha=0.5)
    ax[i].axvline(melt_season_subregion_df['melt_season_end_WOY'].mean(), color='b', linewidth=2)
    ax[i].set_title(subregion)

plt.show()