# Notebook to construct training data using snow cover stats from ELA CSVs and terrain parameters from RGI outlines

Note: You should decide what to do with your adjusted ELAs. Do we use those in place of the automated ELAs in the training data?

In [None]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import geopandas as gpd

In [None]:
# If using Google Colab, mount Google Drive so you can access the files in this folder
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

## Define path in directory to `study-sites`

In [None]:
# Alexandra's path to study-sites/
# study_sites_path = ('/content/drive/My Drive/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/')

# Rainey's path to study-sites/
study_sites_path = '/Users/raineyaberle/Google Drive/My Drive/Research/CryoGARS-Glaciology/Advising/student-research/Alexandra-Friel/snow_cover_mapping_application/study-sites/'

# define output settings for training data CSV
out_path = os.path.join(study_sites_path, '..', 'snow-cover-mapping-application', 'inputs-outputs')
out_fn = 'ELA_training_data.csv'

## Grab list of all sites with ELA CSVs from `study_sites_path`

In [None]:
# Grab all site names in study_site_path
site_names = sorted(os.listdir(study_sites_path))
# Only include site names with ELA CSVs
site_names = [site_name for site_name in site_names if
              len(glob.glob(os.path.join(study_sites_path, site_name, 'ELAs', '*.csv')))>0]
print('Number of sites with ELA CSVs in file = ', len(site_names))
site_names

## Loop through `site_names`, load ELA CSVs and RGI outlines, compile into training data

In [None]:
# Initialize data frame for training data
training_data = pd.DataFrame()

for site_name in (site_names):

    # Load ELAs
    elas = pd.DataFrame() # initialize data frame for ELAs
    ela_fns = glob.glob(os.path.join(study_sites_path, site_name, 'ELAs', '*.csv'))
    # loop over ELA file names
    for ela_fn in ela_fns:
        ela = pd.read_csv(ela_fn) # read ELA file
        elas = pd.concat([elas, ela]) # concatenate to full elas dataframe
    elas.reset_index(drop=True, inplace=True) # renumber dataframe index

    # Load RGI outline
    if 'RGI' in site_name:
        rgi_fn = os.path.join(study_sites_path, site_name, 'AOIs', site_name + '_outline.shp')
    else:
        rgi_fn = os.path.join(study_sites_path, site_name, 'AOIs', site_name + '_RGI_outline.shp')
    rgi = gpd.read_file(rgi_fn)

    # Add RGI terrain parameters to elas
    rgi['glacier_outline'] = rgi['geometry']
    columns = ['CenLon', 'CenLat', 'O1Region', 'O2Region', 'Area',
               'Zmin', 'Zmax', 'Zmed', 'Slope', 'Aspect', 'Lmax', 'Status',
               'Connect', 'Form', 'TermType', 'Surging', 'glacier_outline']
    elas[columns] = [rgi[column][0] for column in columns]

    # Add all to training data frame
    training_data = pd.concat([training_data, elas])

# Save training data to file
training_data.to_csv(os.path.join(out_path, out_fn), index=False)
print('Training data saved to file: ', os.path.join(out_path, out_fn))
training_data.reset_index(drop=True, inplace=True) # renumber training data index
training_data


## Make some plots for fun

In [None]:
from matplotlib.backend_bases import GraphicsContextBase
# -----Plot ELAs for all sites
# pick a colormap
cmap=plt.cm.viridis
# convert datetime strings to pandas datetimes
training_data['datetime'] = pd.to_datetime(training_data['datetime'])
# set datetime as index
training_data.index = training_data['datetime']

# plot
fig, ax = plt.subplots(1,2, figsize=(12,6))
nsites = len(training_data.groupby('site_name'))
i=0
for name, group in training_data.groupby('site_name'):
    # plot AAR
    group.plot(x='datetime', y='AAR', ax=ax[0], color=cmap(i/nsites),
               kind='scatter', legend=False)
    # plot median snowline elevation
    group.plot(x='datetime', y='snowline_elevs_median_m', ax=ax[1],
               color=cmap(i/nsites), kind='scatter', legend=False)

    i+=1
ax[0].grid()
ax[0].set_xlabel('')
ax[0].set_ylabel('Accumulation area ratio')
ax[1].grid()
ax[1].set_xlabel('')
ax[1].set_ylabel('Median snowline elevation [m]')
plt.show()