# Compile data: snowlines, ERA, AOIs

In [1]:
import os
import glob
import pandas as pd
import geopandas as gpd
from tqdm.auto import tqdm
import numpy as np
import sys

In [2]:
# -----Path to snow_cover_mapping
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'

# -----Path to snow-cover-mapping-application/
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/'
sys.path.append(os.path.join(base_path, 'functions'))
import model_analyze_utils as f

# -----Path to study-sites/
study_sites_path = os.path.join(scm_path, 'study-sites')

# -----Load study site names
rgi_ids = [x for x in sorted(os.listdir(study_sites_path)) if 'RGI' in x]
print('Number of study sites = ', len(rgi_ids))
rgi_ids

Number of study sites =  195


['RGI60-01.00032',
 'RGI60-01.00033',
 'RGI60-01.00037',
 'RGI60-01.00038',
 'RGI60-01.00046',
 'RGI60-01.00312',
 'RGI60-01.00566',
 'RGI60-01.00570',
 'RGI60-01.00576',
 'RGI60-01.00675',
 'RGI60-01.01104',
 'RGI60-01.01151',
 'RGI60-01.01390',
 'RGI60-01.01524',
 'RGI60-01.01733',
 'RGI60-01.03594',
 'RGI60-01.03622',
 'RGI60-01.03861',
 'RGI60-01.04375',
 'RGI60-01.04624',
 'RGI60-01.06268',
 'RGI60-01.06722',
 'RGI60-01.08155',
 'RGI60-01.08174',
 'RGI60-01.08246',
 'RGI60-01.08248',
 'RGI60-01.08262',
 'RGI60-01.08288',
 'RGI60-01.08296',
 'RGI60-01.08302',
 'RGI60-01.08336',
 'RGI60-01.08353',
 'RGI60-01.08389',
 'RGI60-01.08395',
 'RGI60-01.08403',
 'RGI60-01.08412',
 'RGI60-01.08427',
 'RGI60-01.09148',
 'RGI60-01.09162',
 'RGI60-01.09216',
 'RGI60-01.09411',
 'RGI60-01.09639',
 'RGI60-01.10196',
 'RGI60-01.10555',
 'RGI60-01.10689',
 'RGI60-01.10778',
 'RGI60-01.10851',
 'RGI60-01.10857',
 'RGI60-01.11616',
 'RGI60-01.11654',
 'RGI60-01.11788',
 'RGI60-01.12347',
 'RGI60-01.1

## Load and compile snow cover stats

In [3]:
scs_path = os.path.join(scm_path, 'compiled_data')
scs_fn = 'all_snow_cover_stats.csv'

def load_site_sc_stats(site_name, study_sites_path):
    sc_path = os.path.join(study_sites_path, site_name)
    sc_fns = glob.glob(os.path.join(sc_path, '*_snow_cover_stats.csv'))
    if len(sc_fns) > 0:
        sc_fn = sc_fns[0]
        sc = pd.read_csv(sc_fn)
    else:
        sc = 'N/A'
    return sc
    
# check if snow cover stats path exists
if not os.path.exists(scs_path):
    os.mkdir(scs_path)
# check if all snowlines CSV exists
if not os.path.exists(os.path.join(scs_path, scs_fn)):
    # compile all RGI glacier boundaries
    scs = pd.DataFrame()
    for rgi_id in tqdm(rgi_ids):
        scs_site = load_site_sc_stats(rgi_id, study_sites_path)
        if type(scs_site) != str:
            scs = pd.concat([scs, scs_site])
    scs.reset_index(drop=True, inplace=True)
    # reduce memory storage
    scs = f.reduce_memory_usage(scs)
    scs.to_csv(os.path.join(scs_path, scs_fn), index=False)
    print('All snow cover stats saved to file: ', os.path.join(scs_path, scs_fn))

else:
    # Load from file if it already exists
    scs = pd.read_csv(os.path.join(scs_path, scs_fn))
    scs['datetime'] = pd.to_datetime(scs['datetime'], format='mixed')
    print('All snow cover stats loaded from file.')
    # Check if more sites need to be added to snow cover stats
    rgi_ids_no_scs = [x for x in rgi_ids if x not in scs['RGIId'].drop_duplicates().values]
    updated = False
    for rgi_id in rgi_ids_no_scs:
        print(f'Adding {rgi_id} to snow cover stats...')
        scs_site = load_site_sc_stats(rgi_id, study_sites_path)
        if type(scs_site) != str:
            scs = pd.concat([scs, scs_site])
    scs.reset_index(drop=True, inplace=True)
    if updated:
        # re-save snowlines to file
        scs.to_csv(os.path.join(scs_path, scs_fn), index=False)
        print('All snow cover stats saved to file: ', os.path.join(scs_path, scs_fn))
        # re-define site names with no snowlines
        rgi_ids_no_scs = [x for x in rgi_ids if x not in 
                          scs['RGIId'].drop_duplicates().values]

print('\nNumber of sites with snow cover stats files:', len(scs['RGIId'].drop_duplicates()))
print(f'\nSites without snow cover stats files: N={len(rgi_ids_no_scs)} \n{rgi_ids_no_scs}')
# scs

All snow cover stats loaded from file.
Adding RGI60-01.22207 to snow cover stats...

Number of sites with snow cover stats files: 194

Sites without snow cover stats files: N=1 
['RGI60-01.22207']


## Load and compile manually picked ELAs

In [None]:
elas_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/compiled_data/'
elas_fn = 'all_manual_ELA_picks.csv'

# Check if compiled ELAs already exist in directory
if os.path.exists(os.path.join(elas_path, elas_fn)):
    elas = pd.read_csv(os.path.join(elas_path, elas_fn))
    print('Manual ELA picks loaded from file.')
else:
    # Grab site IDs with ELAs
    rgi_ids = sorted([x for x in os.listdir(data_path) if os.path.exists(os.path.join(data_path, x))])
    print(f'Number of sites with manual ELA picks = {len(rgi_ids)}')
    # Iterate over sites
    elas = pd.DataFrame()
    for rgi_id in tqdm(rgi_ids_elas):
        ela_fn = os.path.join(data_path, rgi_id, f'{rgi_id}_ELAs_manual_picks.csv')
        ela = pd.read_csv(ela_fn)
        elas = pd.concat([elas, ela])
    elas.reset_index(drop=True, inplace=True)
    # Save to file
    elas.to_csv(os.path.join(elas_path, elas_fn), index=False)
    print('Manual ELA picks saved to file:', os.path.join(elas_path, elas_fn))
    
# elas

## Load and compile glacier boundaries

In [None]:
aois_path = os.path.join(scm_path, 'all_AOIs')
aois_fn = 'all_aois.shp'

def load_site_aoi(site_name, study_sites_path):
    aoi_path = os.path.join(study_sites_path, site_name, 'AOIs')
    aoi_fns = glob.glob(os.path.join(aoi_path, '*RGI*.shp'))
    if len(aoi_fns) > 0:
        aoi_fn = aoi_fns[0]
        aoi = gpd.read_file(aoi_fn)
        aoi = aoi.to_crs('EPSG:4326')
    else:
        aoi = 'N/A'
    return aoi
    
# check if aois path exists
if not os.path.exists(aois_path):
    os.mkdir(aois_path)
# check if all aois shapefile exists
if not os.path.exists(os.path.join(aois_path, aois_fn)):
    # compile all RGI glacier boundaries
    aois = gpd.GeoDataFrame()
    for rgi_id in tqdm(rgi_ids):
        aoi = load_site_aoi(rgi_id, study_sites_path)
        if type(aoi) != str:
            aois = pd.concat([aois, aoi])
    aois.reset_index(drop=True, inplace=True)
    aois.to_file(os.path.join(aois_path, aois_fn), index=False)
    print('All glacier boundaries saved to file: ', os.path.join(aois_path, aois_fn))

else:
    # Load from file if it already exists
    aois = gpd.read_file(os.path.join(aois_path, aois_fn))
    print('All glacier boundaries loaded from file.')
    # Check if more sites need to be added to AOIs
    rgi_ids_no_aois = [x for x in rgi_ids if x not in aois['RGIId'].drop_duplicates().values]
    updated = False
    for rgi_id in rgi_ids_no_aois:
        print(f'Adding {rgi_id} to AOIs...')
        aoi = load_site_aoi(rgi_id, study_sites_path)
        if type(aoi) != str:
            aois = pd.concat([aois, aoi])
    if updated:
        # re-save AOIs to file
        aois.reset_index(drop=True, inplace=True)
        aois.to_file(os.path.join(aois_path, aois_fn), index=False)
        print('All glacier boundaries saved to file: ', os.path.join(aois_path, aois_fn))
        # re-define site names with no snowlines
        rgi_ids_no_aois = [x for x in rgi_ids if x not in aois['RGIId'].drop_duplicates().values]

aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('Number of sites with glacier boundaries = ', len(aois['RGIId'].drop_duplicates()))
print(f'\nSites without glacier boundaries: N={len(rgi_ids_no_aois)} \n{rgi_ids_no_aois}')
# aois

## Load and compile ERA data

In [None]:
eras_path = os.path.join(scm_path, 'all_ERA_data')
eras_fn = 'all_era_data.csv'

def load_site_era_data(site_name, study_sites_path):
    era_path = os.path.join(study_sites_path, site_name, 'ERA')
    era_fns = glob.glob(os.path.join(era_path, '*ERA*.csv'))
    if len(era_fns) > 0:
        era_fn = era_fns[0]
        era = pd.read_csv(era_fn)
        era['site_name'] = site_name
    else:
        era = 'N/A'
    return era
    
# Check if ERA path exists
if not os.path.exists(eras_path):
    os.mkdir(eras_path)
# Check if ERA CSV exists
if not os.path.exists(os.path.join(eras_path, eras_fn)):
    # Compile all ERA data
    eras = pd.DataFrame()
    rgi_ids = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_path, 'study-sites', 'RGI*')))]
    for rgi_id in tqdm(rgi_ids):
        era_site = load_site_era_data(rgi_id, study_sites_path)
        if type(era_site) != str:
            eras = pd.concat([eras, era_site])
    eras.reset_index(drop=True, inplace=True)
    eras.to_csv(os.path.join(eras_path, eras_fn), index=False)
    print('All ERA data saved to file: ', os.path.join(eras_path, eras_fn))
    site_names_no_era = [x for x in site_names if x not in eras['site_name'].drop_duplicates().values]

else:
    # Coad from file if it already exists
    eras = pd.read_csv(os.path.join(eras_path, eras_fn))
    print('All ERA data loaded from file.')
    # Check if more sites need to be added to ERAs
    rgi_ids = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_path, 'study-sites', 'RGI*')))]
    rgi_ids_no_era = [x for x in rgi_ids if x not in eras['RGIId'].drop_duplicates().values]
    updated = False
    for rgi_id in rgi_ids_no_era:
        print(f'Adding {rgi_id} to ERAs...')
        era_site = load_site_era_data(rgi_id, study_sites_path)
        if type(era_site) != str:
            eras = pd.concat([eras, era_site])
    if updated:
        # re-save ERAs to file
        eras.reset_index(drop=True, inplace=True)
        eras.to_csv(os.path.join(eras_path, eras_fn), index=False)
        print('All ERA data saved to file: ', os.path.join(eras_path, eras_fn))
        # re-define site names with no snowlines
        site_names_no_era = [x for x in site_names if x not in eras['RGIId'].drop_duplicates().values]

print('Number of sites with ERA = ', len(eras['site_name'].drop_duplicates()))
print(f'\nSites without ERA: N={len(site_names_no_era)} \n{site_names_no_era}')
# eras