In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob

In [None]:
# If using Google Colab, mount Google Drive so you can access your Drive folders
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Define path to 'snow_cover_mapping_application/study-sites/'
study_sites_path = 'drive/MyDrive/Research/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/'

In [None]:
# Grab list of study site names in folder
os.chdir(study_sites_path)
site_names = sorted([x[0:-1] for x in glob.glob('*/', recursive = True)])
site_names

In [None]:
# define functinos to solve for the PDD normalization factor using gradient descent
# adapted from: https://towardsdatascience.com/gradient-descent-in-python-a0d07285742f

def cal_cost(max_value, y1, y2):
    """
    max_value = maximum value over which to normalize y1
    y1 = PDDs
    y2 = median snowline elevations [m]
    """

    # remove NaNs from time series
    ireal = np.argwhere(~np.isnan(y2))
    y1 = y1[ireal]
    y2 = y2[ireal]

    # normalize y1 from min(y1) to max_value
    y1_norm = ((y1 - np.nanmin(y1)) / (np.nanmax(y1) - np.nanmin(y1))
              * (max_value - np.nanmin(y2)) + np.nanmin(y2))

    # calculate mean difference between y1 and y2
    diff = np.nanmean(np.abs(y1_norm-y2))

    return diff

def gradient_descent(y1, y2, learning_rate=1, iterations=1000):

    cost_history = np.zeros(iterations)
    max_value_history = np.zeros(iterations)
    max_value = np.nanmax(y2)
    for it in range(iterations):
        max_value = max_value - learning_rate
        max_value_history[it] = max_value
        cost_history[it] = cal_cost(max_value, y1, y2)

    # find optimum max_value where cost = minimum
    ibest = np.argwhere(cost_history==np.nanmin(cost_history))[0][0]
    max_value_best = max_value_history[ibest]

    # normalize y1 from min(y2) to max_value_best
    y1_norm = ((y1 - np.nanmin(y1)) / (np.nanmax(y1) - np.nanmin(y1))
              * (max_value_best - np.nanmin(y2)) + np.nanmin(y2))

    return y1_norm, max_value_best, max_value_history, cost_history

In [None]:
# Define names of study sites to plot
site_names = ['Wolverine', 'Gulkana', 'LemonCreek', 'SouthCascade', 'Sperry']

# loop through study sites
for site_name in site_names:

  print(site_name)

  # load snowlines
  sl_full = pd.DataFrame()
  sl_fns = glob.glob(site_name + '/imagery/snowlines/*.csv')
  for sl_fn in sl_fns:
    sl = pd.read_csv(sl_fn)
    sl_full = pd.concat([sl_full, sl])
  sl_full.reset_index(drop=True, inplace=True)

  if len(sl_full) < 1:
    print('No snowlines in files, continuing...')
    print(' ')
    continue
  sl_full['datetime'] = sl_full['datetime'].astype('datetime64[ns]')
  sl_full = sl_full.sort_values(by='datetime')

  # load ERA data
  if len(glob.glob(site_name + '/ERA/*.csv')) < 1:
    print('No ERA data in files, continuing...')
    print(' ')
    continue
  ERA_fn = glob.glob(site_name + '/ERA/*.csv')[0]
  ERA = pd.read_csv(ERA_fn)
  ERA['Date'] = ERA['Date'].astype('datetime64[ns]')

  # add days since first ERA date column
  ERA['days'] = (ERA['Date'] - ERA['Date'][0]) / np.timedelta64(1, 'D')
  sl_full['days'] = (sl_full['datetime'] - ERA['Date'][0]) / np.timedelta64(1, 'D')
  # interpolate PDD_norm at snowline estimate dates
  PDD_interp = np.interp(sl_full['days'].values, ERA['days'].values, ERA['Cumulative_Positive_Degree_Days'].values)

  # solve for best maximum value for normalizing PDDs
  _, max_value_best, _, _ = gradient_descent(PDD_interp, sl_full['snowline_elevs_median_m'].values)
  print('Optimum max value = ' + str(max_value_best))
  PDD_norm = ((ERA['Cumulative_Positive_Degree_Days'].values - np.nanmin(ERA['Cumulative_Positive_Degree_Days'].values))
              / (np.nanmax(ERA['Cumulative_Positive_Degree_Days'].values) - np.nanmin(ERA['Cumulative_Positive_Degree_Days'].values))
              * (max_value_best - np.nanmin(sl_full['snowline_elevs_median_m'].values)) + np.nanmin(sl_full['snowline_elevs_median_m'].values))
  # interpolate normalized PDDs to snowline observation dates
  PDD_norm_interp = np.interp(sl_full['days'].values, ERA['days'].values, PDD_norm)

  # calculate difference between PDD_norm_interp and median snowline elevations
  diff = sl_full['snowline_elevs_median_m'].values - PDD_norm_interp

  # define threshold for filtering snowline points
  threshold = 0.5 * (np.nanmax(sl_full['snowline_elevs_median_m'].values) - np.nanmin(sl_full['snowline_elevs_median_m'].values))
  ifilt = np.ravel(np.argwhere(np.abs(diff) >= threshold))
  ikeep = np.ravel(np.argwhere(np.abs(diff) < threshold))

  # plot normalized PDDs and snowline elevations
  fig, ax = plt.subplots(1,2, figsize=(16, 8))
  ax[0].plot(ERA['Date'], PDD_norm, '.b', markersize=1.5, label='normalized PDDs')
  ax[0].plot(sl_full['datetime'].iloc[ifilt], sl_full['snowline_elevs_median_m'].iloc[ifilt], '.r', label='removed points')
  ax[0].plot(sl_full['datetime'].iloc[ikeep], sl_full['snowline_elevs_median_m'].iloc[ikeep], '.k', label='filtered time series')
  ax[0].set_xlim([np.datetime64('2013-01-01'), np.datetime64('2022-12-01')])
  ax[0].set_ylabel('Elevation [m]')
  ax[0].legend(loc='best')
  ax[0].grid()
  # plot differences
  ax[1].plot(sl_full['days'].iloc[ifilt], diff[ifilt], '.r', label='removed points')
  ax[1].plot(sl_full['days'].iloc[ikeep], diff[ikeep], '.k', label='remaining points')
  ax[1].plot([np.nanmin(sl_full['days'].values), np.nanmax(sl_full['days'].values)], [threshold, threshold], '--k', label='threshold')
  ax[1].plot([np.nanmin(sl_full['days'].values), np.nanmax(sl_full['days'].values)], [-threshold, -threshold], '--k')
  ax[1].set_xlim([np.nanmin(sl_full['days'].values), np.nanmax(sl_full['days'].values)])
  ax[1].legend(loc='best')
  ax[1].grid()
  ax[1].set_xlabel('Days')
  ax[1].set_ylabel('Differences')

  fig.suptitle(site_name)
  fig.tight_layout()
  plt.show()

  print(' ')


In [None]:
ERA

In [None]:
ERA.plot(x='Date', y='Cumulative_Positive_Degree_Days')
plt.show()