# Calculate median weekly trends in snow cover for each study site

In [None]:
import os
import glob
import shutil
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import sys
from scipy.stats import iqr
from shapely import wkt
import seaborn as sns
import numpy as np

## Define paths in directory

In [None]:
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
figures_out_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/'

## Load compiled glacier boundaries (AOIs) and climate clusters

In [None]:
# -----Load AOIs
aois_fn = os.path.join(scm_path, 'analysis', 'all_aois.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All glacier boundaries loaded from file.')

# -----Load climate clusters
clusters_fn = os.path.join(scm_path, 'analysis', 'climate_clusters.csv')
clusters = pd.read_csv(clusters_fn)
print('Clusters loaded from file.')

## Compile snow cover stats for individual sites if necessary

In [None]:
# # Merge snow cover stats files for each site if necessary

# # Define columns to save
# out_cols = ['RGIId', 'datetime', 'source', 'SCA_m2', 'AAR', 'ELA_from_AAR_m', 'snowline_elevs_m', 'snowline_elevs_median_m', 'snowline_geometry']

# # Define function to convert lists of X and Y coordinates to a LineString and transform to WGS84
# def create_linestring_wgs84(x_coords, y_coords, transformer):
#     # Create list of (x, y) coordinate tuples
#     points = list(zip(x_coords, y_coords))
#     # Create LineString in UTM
#     line = LineString(points)
#     # Transform to WGS84
#     line_wgs84 = LineString([transformer.transform(x, y) for x, y in line.coords])
#     return line_wgs84

# # Iterate over sites
# for rgi_id in tqdm(rgi_ids):
#     # Define output file name
#     scs_fn = os.path.join(study_sites_path, rgi_id, f'{rgi_id}_snow_cover_stats.csv')
#     if not os.path.exists(scs_fn):
#         # Initialize dataframe
#         scs = pd.DataFrame()
#         # Get snow cover stats file names
#         sc_fns = sorted(glob.glob(os.path.join(study_sites_path, rgi_id, 'imagery', 'snowlines', '*_snowline.csv')))
#         # Merge files
#         for fn in sc_fns:
#             if 'PlanetScope' not in fn:
#                 sc = pd.read_csv(fn)
#                 scs = pd.concat([scs, sc])
#         # Merge redundant columns
#         cols = list(scs.keys())
#         if ('site_name' in cols) & ('RGIId' in cols):
#             scs['RGIId'] = scs['RGIId'].fillna(scs['site_name'])
#         elif 'site_name' in cols:
#             scs.rename(columns={'site_name': 'RGIId'}, inplace=True)
#         if ('dataset' in cols) & ('source' in cols):
#             scs['source'] = scs['source'].fillna(scs['dataset'])
#         elif 'dataset' in cols:
#             scs.rename(columns={'dataset': 'source'}, inplace=True)
#         scs.reset_index(drop=True, inplace=True)
#         # Ensure coordinate lists are correctly formatted
#         scs['snowlines_coords_X'] = scs['snowlines_coords_X'].apply(lambda x: literal_eval(x) if x != "[]" else [])
#         scs['snowlines_coords_Y'] = scs['snowlines_coords_Y'].apply(lambda x: literal_eval(x) if x != "[]" else [])
#         # Get the UTM CRS for transformation
#         crs_utm = scs['HorizontalCRS'].drop_duplicates().dropna().values[0]
#         transformer = Transformer.from_crs(crs_utm, "EPSG:4326", always_xy=True)
#         # Create and transform snowline geometries
#         scs['snowline_geometry'] = scs.apply(lambda row: create_linestring_wgs84(row['snowlines_coords_X'], row['snowlines_coords_Y'], transformer), axis=1)
#         # Select the relevant columns
#         scs = scs[out_cols]
#         # Save merged, adjusted dataframe
#         scs.to_csv(scs_fn, index=False)

## Calculate weekly median trends for each site

In [None]:
scs_medians_fn = os.path.join(scm_path, 'analysis', 'weekly_median_snow_cover_stats.csv') 
if not os.path.exists(scs_medians_fn):
    # determine columns to calculate weekly stats
    columns = ['AAR', 'snowline_elevs_median_m', 'SCA_m2', 'ELA_from_AAR_m']
    scs_medians = pd.DataFrame()

    # Iterate over study sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Compile all snow cover stats
        scs_fn = os.path.join(scm_path, 'study-sites', rgi_id, f'{rgi_id}_snow_cover_stats.csv')
        scs_site = pd.read_csv(scs_fn)

        # Add WOY column
        if 'datetime' not in scs_site.keys():
            print(f'Error with {rgi_id}')
            continue
        scs_site['datetime'] = pd.to_datetime(scs_site['datetime'], format='mixed')
        scs_site['WOY'] = scs_site['datetime'].dt.isocalendar().week
        
        # calculate weekly quartile trends
        q1 = scs_site[['WOY'] + columns].groupby(by='WOY').quantile(0.25)
        q1.columns = [x + '_P25' for x in q1.columns]
        q2 = scs_site[['WOY'] + columns].groupby(by='WOY').quantile(0.5)
        q2.columns = [x + '_P50' for x in q2.columns]
        q3 = scs_site[['WOY'] + columns].groupby(by='WOY').quantile(0.75)
        q3.columns = [x + '_P75' for x in q3.columns]
        qs = pd.merge(q1, pd.merge(q2, q3, on='WOY'), on='WOY')
        qs = qs.reindex(sorted(qs.columns), axis=1)
        qs['WOY'] = qs.index
        qs['RGIId'] = rgi_id
        # concatenate to medians dataframe
        scs_medians = pd.concat([scs_medians, qs])
    # save to file
    scs_medians.to_csv(scs_medians_fn, index=False)
    print('Median weekly snow trends saved to file: ', scs_medians_fn)
        
else:
    scs_medians = pd.read_csv(scs_medians_fn)
    print('Median weekly snow cover trends loaded from file.')
    
scs_medians
        

In [None]:
# -----Compile minimum snow cover median statistics
min_snow_cover_stats_fn = os.path.join(scm_path, 'analysis', 'min_snow_cover_stats.csv') 
# check if exists in directory
if not os.path.exists(min_snow_cover_stats_fn):
    # initialize dataframe for RGI stats and minimum snow cover statts
    min_snow_cover_stats = pd.DataFrame()
    
    # iterate over site names in median snow cover stats dataframe
    for rgi_id in tqdm(sorted(scs_medians['RGIId'].drop_duplicates().values)):
        # grab weekly median snowline stats for site
        scs_medians_site = scs_medians.loc[scs_medians['RGIId']==rgi_id]
        # calculate min median stats
        median_columns = [x for x in scs_medians.columns if 'P50' in x]
        df = pd.DataFrame()
        for column in median_columns:
            # Take the max ELA and snowline elevations
            if (column=='ELA_from_AAR_m_P50') or (column=='snowline_elevs_median_m_P50'):
                Imax = scs_medians_site[column].argmax()
                max_value = scs_medians_site.iloc[Imax][column]
                max_WOY = scs_medians_site.iloc[Imax]['WOY']
                df[column+'_max'] = [max_value]
                df[column+'_max_WOY'] = [max_WOY]
            # Take the minimum AAR and SCA
            else:
                Imin = scs_medians_site[column].argmin()
                min_value = scs_medians_site.iloc[Imin][column]
                min_WOY = scs_medians_site.iloc[Imin]['WOY']
                df[column+'_min'] = [min_value]
                df[column+'_min_WOY'] = [min_WOY]
            df['RGIId'] = rgi_id
        # save the ~September 1 observation for reference
        if 39 in scs_medians_site['WOY'].values:
            df[[column+'_WOY39' for column in columns]] = scs_medians_site.loc[scs_medians_site['WOY']==39, columns].values
        else:
            # If WOY 39 not in df, interpolate linearly from WOY 38 and 40
            df[[column+'_WOY39' for column in columns]] = (scs_medians_site.loc[scs_medians_site['WOY']==38, columns].values
                                                           + scs_medians_site.loc[scs_medians_site['WOY']==40, columns].values)/2
        # concatenate to full dataframe
        min_snow_cover_stats = pd.concat([min_snow_cover_stats, df], axis=0)

    # save to file
    min_snow_cover_stats.to_csv(min_snow_cover_stats_fn, index=False)
    print('Minimum median snow cover stats saved to file: ', min_snow_cover_stats_fn)
        
else:
    # load from file
    min_snow_cover_stats = pd.read_csv(min_snow_cover_stats_fn)
    print('Minimum median snow cover stats loaded from file.')

# reformat as GeoDataFrame
min_snow_cover_stats

In [None]:
# Add subregion and cluster columns
min_snow_cover_stats[['Subregion', 'clustName']] = '', ''
for rgi_id in min_snow_cover_stats['RGIId'].drop_duplicates().values:
    subregion = aois.loc[aois['RGIId']==rgi_id, 'Subregion'].values[0]
    clustName = clusters.loc[clusters['RGIId']==rgi_id, 'clustName'].values[0]
    min_snow_cover_stats.loc[min_snow_cover_stats['RGIId']==rgi_id, ['Subregion', 'clustName']] = subregion, clustName

fig, ax = plt.subplots(2, 1, figsize=(10, 12))
sns.kdeplot(data=min_snow_cover_stats, x='AAR_P50_min_WOY', palette='mako', cumulative=True, hue='Subregion', 
             ax=ax[0])
sns.kdeplot(data=min_snow_cover_stats, x='AAR_P50_min_WOY', cumulative=True, hue='clustName', 
             ax=ax[1])
plt.show()

In [None]:
# Print stats
min_snow_cover_stats.groupby(by=['Subregion', 'clustName'])['AAR_P50_min'].median()

In [None]:
min_snow_cover_stats.groupby(by=['clustName'])['ELA_from_AAR_m_P50_max_WOY'].mean()

## Assess interannual variability in AAR magnitude and timing at each site

In [None]:
aar_var_stats_fn = os.path.join(os.path.join(scm_path, 'analysis', 'minimum_snow_cover_stats_interannual_variability_2016-2023.csv'))
if os.path.exists(aar_var_stats_fn):
    aar_var_stats = pd.read_csv(aar_var_stats_fn)
    print('AAR interannual variability stats loaded from file.')

else:
    aar_var_stats = pd.DataFrame()
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load snow cover stats
        scs_fns = sorted(glob.glob(os.path.join(scm_path, 'study-sites', rgi_id, 
                                                'imagery', 'snowlines', '*.csv')))
        scs_site = pd.DataFrame()
        for fn in scs_fns:
            sc = pd.read_csv(fn)
            scs_site = pd.concat([scs_site, sc])
        scs_site.reset_index(drop=True, inplace=True)
        # Add Year and WOY columns
        if 'datetime' not in scs_site.keys():
            print(f'Error with {rgi_id}')
            continue
        scs_site['datetime'] = pd.to_datetime(scs_site['datetime'], format='mixed')
        scs_site['Year'] = scs_site['datetime'].dt.isocalendar().year
        scs_site['WOY'] = scs_site['datetime'].dt.isocalendar().week
        
        # subset to 2016
        scs_site = scs_site.loc[scs_site['Year'] >= 2016]
        # identify annual AAR magnitudes and WOY timing
        annual_mins_site = scs_site.groupby('Year')['AAR'].idxmin().reset_index()
        annual_mins_site.rename(columns={'AAR': 'Imin'}, inplace=True)
        annual_mins_site['AAR'] = [scs_site.loc[i, 'AAR'] for i in annual_mins_site['Imin'].values]
        annual_mins_site['WOY'] = [scs_site.loc[i, 'WOY'] for i in annual_mins_site['Imin'].values]
        df = pd.DataFrame({'RGIId': [rgi_id],
                           'AAR_min': [annual_mins_site['AAR'].min()],
                           'AAR_max': [annual_mins_site['AAR'].max()],
                           'AAR_median': [annual_mins_site['AAR'].median()],
                           'AAR_IQR': [iqr(annual_mins_site['AAR'])],
                           'WOY_min': [annual_mins_site['WOY'].min()],
                           'WOY_max': [annual_mins_site['WOY'].max()],
                           'WOY_median': [annual_mins_site['WOY'].median()],
                           'WOY_IQR': [iqr(annual_mins_site['WOY'])]})  
        aar_var_stats = pd.concat([aar_var_stats, df])
    
    aar_var_stats.reset_index(drop=True, inplace=True)

    # Save to file
    aar_var_stats.to_csv(aar_var_stats_fn, index=False)
    print('AAR interannual variability stats saved to file:', aar_var_stats_fn)

aar_var_stats


In [None]:
# -----Print stats
aar_var_stats['AAR_range'] = aar_var_stats['AAR_max'] - aar_var_stats['AAR_min']
print(f"AAR range for all sites: {aar_var_stats['AAR_range'].median()} +/- {iqr(aar_var_stats['AAR_range'])}\n")
# print('By subregion:')
# print('Median')
# print(aar_var_stats.groupby(['Subregion'])['AAR_range'].median())
# print('\n')
# print('IQR')
# print(aar_var_stats.groupby(['Subregion'])['AAR_range'].apply(iqr))

In [None]:
aar_var_stats['WOY_range'] = aar_var_stats['WOY_max'] - aar_var_stats['WOY_min']
print(f"AAR TIMING range for all sites: {aar_var_stats['WOY_range'].median()} +/- {iqr(aar_var_stats['WOY_range'])}\n")
# print('By subregion:')
# print('Median')
# print(aar_var_stats.groupby(['Subregion'])['WOY_range'].median())
# print('\n')
# print('IQR')
# print(aar_var_stats.groupby(['Subregion'])['WOY_range'].apply(iqr))

## Identify the approximate start and end of the melt season in each subregion from ERA data

In [None]:
melt_season_fn = os.path.join(scm_path, 'analysis', 'melt_season_timing.csv')

if not os.path.exists(melt_season_fn):
    melt_season_df = pd.DataFrame()
    
    # Iterate over sites
    for rgi_id in tqdm(aois['RGIId'].drop_duplicates().values):
        # Load ERA data
        era_fn = os.path.join(scm_path, 'study-sites', rgi_id, 'ERA', f'{rgi_id}_ERA5_daily_means.csv')
        era = pd.read_csv(era_fn)
        era['Date'] = pd.to_datetime(era['Date'])
    
        # Add WOY column
        era['WOY'] = era['Date'].dt.isocalendar().week
    
        # Calculate weekly medians for 2013–2022
        era = era.loc[era['year'] > 2012]
        if '.geo' in era.keys():
            era = era.drop(columns=['.geo'])
        era_weekly_median = era.groupby('WOY').median().reset_index()
    
        # Estimate start and end of melt seasons
        # Start = positive PDDs
        try:
            woy_start = era_weekly_median.loc[era_weekly_median['positive_degree_days_annual_sum'] > 0, 'WOY'].values[0]
        except:
            woy_start = 52
        # End = after July, 0 PDDs, positive snowfall
        woy_end = era_weekly_median.loc[(era_weekly_median['WOY'] > 30) 
                                     & (era_weekly_median['positive_degree_days'] == 0) 
                                     & (era_weekly_median['mean_snowfall_sum'] > 0), 'WOY'].values[0]
        
        # Add to full dataframe
        df = pd.DataFrame({'RGIId': [rgi_id], 
                           'melt_season_start_WOY': [woy_start],
                           'melt_season_end_WOY': [woy_end],
                          })
        melt_season_df = pd.concat([melt_season_df, df], axis=0)
    
    # Save to file
    melt_season_df.reset_index(drop=True, inplace=True)
    melt_season_df.to_csv(melt_season_fn, index=False)
    print('Melt season timing CSV saved to file:', melt_season_fn)

else:
    melt_season_df = pd.read_csv(melt_season_fn)
    print('Melt season timing CSV loaded from file.')

melt_season_df
        

In [None]:
# Plot some results

# Add subregion and cluster columns
if 'Subregion' not in melt_season_df.keys():
    melt_season_df['Subregion'] = ''
    melt_season_df['clustName'] = ''
    for rgi_id in melt_season_df['RGIId'].drop_duplicates().values:
        melt_season_df.loc[melt_season_df['RGIId']==rgi_id, 'Subregion'] = aois.loc[aois['RGIId']==rgi_id, 'Subregion'].values
        melt_season_df.loc[melt_season_df['RGIId']==rgi_id, 'clustName'] = clusters.loc[clusters['RGIId']==rgi_id, 'clustName'].values

nsubregions = len(melt_season_df['Subregion'].drop_duplicates().values)
fig, ax = plt.subplots(nsubregions, 1, figsize=(8, nsubregions*4))
for i, subregion in enumerate(melt_season_df['Subregion'].drop_duplicates().values):
    melt_season_subregion_df = melt_season_df.loc[melt_season_df['Subregion']==subregion]
    ax[i].hist(melt_season_subregion_df['melt_season_start_WOY'], bins=20, facecolor='m', alpha=0.5)
    ax[i].axvline(melt_season_subregion_df['melt_season_start_WOY'].mean(), color='m', linewidth=2)
    ax[i].hist(melt_season_subregion_df['melt_season_end_WOY'], bins=20, facecolor='b', alpha=0.5)
    ax[i].axvline(melt_season_subregion_df['melt_season_end_WOY'].mean(), color='b', linewidth=2)
    ax[i].set_title(subregion)

plt.show()