# Compile CSVs exported from the GlaSEE pipeline

__NOTE:__ You must do one of the following to access your CSVs

- Upload this notebook to your Google Drive and run as a Colab notebook.

- Download the CSVs locally.

- Download Google Drive Desktop or other software for mounting your Drive locally.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import os
import seaborn as sns

## Define path to files

In [None]:
# If using Google Colab, mount your Drive
from google.colab import drive
drive.mount('/content/drive')

# Define to the Google Drive folder with exported CSV files
out_path = '/content/drive/My Drive/glacier_snow_cover_exports/'

## Compile CSVs

In [None]:
# -----Option 1: enter glacier IDs manually
# May be desired if some are glaciers still exporting, etc.
# glacier_IDs = ['G219787E60289N']

# -----Option 2: Grab all the glacier IDs in the folder
all_files = glob(os.path.join(out_path, '*.csv'))
ids = []
for file in all_files:
  id = file.split('/')[-1].split('_')[0]
  ids.append(id)
glacier_IDs = list(set(ids))
glacier_IDs.sort()
glacier_IDs

print('Number of unique glacier IDs:', len(glacier_IDs))
print(glacier_IDs)

In [None]:
# Iterate over glacier IDs
for glacier_ID in glacier_IDs:
  print('\n', glacier_ID)

  # Define output file name
  out_fn = os.path.join(out_path, glacier_ID + '_timeseries.csv')
  if os.path.exists(out_fn):
    print(f'Compiled CSV already exists, skipping.')

  # Iterate over CSVs
  files = glob(os.path.join(out_path, glacier_ID + '*snow_cover_stats*.csv'))
  print(f'Found {len(files)} CSVs to compile')
  dfs = []
  for file in files:
    try:
      df = pd.read_csv(file)
      dfs.append(df)
    except:
      print('Error reading',file)

  # Compile and save new CSV to file
  if len(dfs) > 0:
    # concatenate dataframes
    glacier_df = pd.concat(dfs)

    # sort by date
    glacier_df = glacier_df.sort_values(by='date')

    # get rid of empty columns
    glacier_df = glacier_df.drop(columns=['system:index', '.geo'])

    # save to file
    glacier_df.to_csv(out_fn, index=False)
    print('Compiled time series saved to file:', out_fn)


## Plot some time series data for each glacier

In [None]:
# Iterate over glacier IDs
for glacier_ID in glacier_IDs:
  # load compiled time series
  df = pd.read_csv(os.path.join(out_path, glacier_ID + '_timeseries.csv'))
  df['date'] = pd.to_datetime(df['date'])

  # plot
  fig, ax = plt.subplots(figsize=(8,3))
  sns.scatterplot(df, x='date', y='transient_AAR', hue='source', sizes=10)
  ax.set_title(glacier_ID)
  ax.set_ylim(-0.1, 1.1)
  plt.grid()
  plt.show()

## Optional: delete the raw files

In [None]:
# once ready, delete the raw files
for glacier_ID in glacier_IDs:
  raw_files = glob(os.path.join(out_path, glacier_ID + '*snow_cover_stats*.csv'))
  for file in raw_files:
    print(file)
    os.remove(file)
