# Compile data: snowlines, ERA, AOIs

In [1]:
import os
import glob
import pandas as pd
import geopandas as gpd
from tqdm.auto import tqdm
import numpy as np

In [2]:
# -----Path to data
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'

# -----Path to study-sites/
study_sites_path = os.path.join(scm_path, 'study-sites')

# -----Load study site names
site_names = [x for x in sorted(os.listdir(study_sites_path)) if 'RGI' in x]
print('Number of study sites = ', len(site_names))
site_names

Number of study sites =  195


['RGI60-01.00032',
 'RGI60-01.00033',
 'RGI60-01.00037',
 'RGI60-01.00038',
 'RGI60-01.00046',
 'RGI60-01.00312',
 'RGI60-01.00566',
 'RGI60-01.00570',
 'RGI60-01.00576',
 'RGI60-01.00675',
 'RGI60-01.01104',
 'RGI60-01.01151',
 'RGI60-01.01390',
 'RGI60-01.01524',
 'RGI60-01.01733',
 'RGI60-01.03594',
 'RGI60-01.03622',
 'RGI60-01.03861',
 'RGI60-01.04375',
 'RGI60-01.04624',
 'RGI60-01.06268',
 'RGI60-01.06722',
 'RGI60-01.08155',
 'RGI60-01.08174',
 'RGI60-01.08246',
 'RGI60-01.08248',
 'RGI60-01.08262',
 'RGI60-01.08288',
 'RGI60-01.08296',
 'RGI60-01.08302',
 'RGI60-01.08336',
 'RGI60-01.08353',
 'RGI60-01.08389',
 'RGI60-01.08395',
 'RGI60-01.08403',
 'RGI60-01.08412',
 'RGI60-01.08427',
 'RGI60-01.09148',
 'RGI60-01.09162',
 'RGI60-01.09216',
 'RGI60-01.09411',
 'RGI60-01.09639',
 'RGI60-01.10196',
 'RGI60-01.10555',
 'RGI60-01.10689',
 'RGI60-01.10778',
 'RGI60-01.10851',
 'RGI60-01.10857',
 'RGI60-01.11616',
 'RGI60-01.11654',
 'RGI60-01.11788',
 'RGI60-01.12347',
 'RGI60-01.1

## Load and compile snowlines

In [3]:
snowlines_path = os.path.join(scm_path, 'all_snowlines')
snowlines_fn = 'all_snowlines.csv'
# check if snowlines path exists
if not os.path.exists(snowlines_path):
    os.mkdir(snowlines_path)
# check if all snowlines CSV exists
if not os.path.exists(os.path.join(snowlines_path, snowlines_fn)):
    # compile all RGI glacier boundaries
    snowlines = pd.DataFrame()
    for site_name in tqdm(site_names):
        snowline_path = os.path.join(study_sites_path, site_name)
        snowline_fns = glob.glob(os.path.join(snowline_path, '*_snowlines.csv'))
        if len(snowline_fns) > 0:
            snowline_fn = snowline_fns[0]
            snowline = pd.read_csv(snowline_fn)
            snowlines = pd.concat([snowlines, snowline])
    snowlines.reset_index(drop=True, inplace=True)
    snowlines.to_csv(os.path.join(snowlines_path, snowlines_fn), index=False)
    print('All snowlines saved to file: ', os.path.join(snowlines_path, snowlines_fn))

else:
    # load from file if it already exists
    snowlines = pd.read_csv(os.path.join(snowlines_path, snowlines_fn))
    snowlines['datetime'] = pd.to_datetime(snowlines['datetime'], format='mixed')
    print('All snowlines loaded from file.')

print('Number of sites with snowlines files:', len(snowlines['site_name'].drop_duplicates()))
print('\nSites without snowline files: ')
print([x for x in site_names if x not in snowlines['site_name'].drop_duplicates().values])
# snowlines

  0%|          | 0/195 [00:00<?, ?it/s]

All snowlines saved to file:  /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/all_snowlines/all_snowlines.csv
Number of sites with snowlines files: 187

Sites without snowline files: 
['RGI60-01.14523', 'RGI60-01.22207', 'RGI60-01.22699', 'RGI60-02.05157', 'RGI60-02.06859', 'RGI60-02.12721', 'RGI60-02.12722', 'RGI60-02.13130', 'RGI60-02.14297', 'RGI60-02.16722']


## Load and compile glacier boundaries

In [4]:
aois_path = os.path.join(scm_path, 'all_AOIs')
aois_fn = 'all_aois.shp'
# check if aois path exists
if not os.path.exists(aois_path):
    os.mkdir(aois_path)
# check if all aois shapefile exists
if not os.path.exists(os.path.join(aois_path, aois_fn)):
    # compile all RGI glacier boundaries
    aois = gpd.GeoDataFrame()
    for site_name in tqdm(site_names):
        aoi_path = os.path.join(study_sites_path, site_name, 'AOIs')
        aoi_fns = glob.glob(os.path.join(aoi_path, '*RGI*.shp'))
        if len(aoi_fns) > 0:
            aoi_fn = aoi_fns[0]
            aoi = gpd.read_file(aoi_fn)
            aoi = aoi.to_crs('EPSG:4326')
            aois = pd.concat([aois, aoi])
    aois.reset_index(drop=True, inplace=True)
    aois.to_file(os.path.join(aois_path, aois_fn), index=False)
    print('All glacier boundaries saved to file: ', os.path.join(aois_path, aois_fn))

else:
    # load from file if it already exists
    aois = gpd.read_file(os.path.join(aois_path, aois_fn))
    print('All glacier boundaries loaded from file.')
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('Number of sites with glacier boundaries = ', len(aois['RGIId'].drop_duplicates()))
print('\nSites without glacier boundaries:')
print([x for x in site_names if x not in aois['RGIId'].drop_duplicates().values])
# aois

  0%|          | 0/195 [00:00<?, ?it/s]

All glacier boundaries saved to file:  /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/all_AOIs/all_aois.shp
Number of sites with glacier boundaries =  188

Sites without glacier boundaries:
['RGI60-01.00570', 'RGI60-01.06722', 'RGI60-01.10555', 'RGI60-01.10689', 'RGI60-01.11616', 'RGI60-01.12635', 'RGI60-01.14391', 'RGI60-01.22699', 'RGI60-01.26743', 'RGI60-02.12433', 'RGI60-02.12435']


## Load and compile ERA data

In [5]:
eras_path = os.path.join(scm_path, 'all_ERA_data')
eras_fn = 'all_era_data.csv'
# check if ERA path exists
if not os.path.exists(eras_path):
    os.mkdir(eras_path)
# check if ERA CSV exists
if not os.path.exists(os.path.join(eras_path, eras_fn)):
    # compile all ERA data
    eras = pd.DataFrame()
    site_names = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_path, 'study-sites', 'RGI*')))]
    for site_name in tqdm(site_names):
        era_path = os.path.join(scm_path, 'study-sites', site_name, 'ERA')
        era_fns = glob.glob(os.path.join(era_path, '*ERA*.csv'))
        if len(era_fns) > 0:
            era_fn = era_fns[0]
            era = pd.read_csv(era_fn)
            era['site_name'] = site_name
            eras = pd.concat([eras, era])
    eras.reset_index(drop=True, inplace=True)
    eras.to_csv(os.path.join(eras_path, eras_fn), index=False)
    print('All ERA data saved to file: ', os.path.join(eras_path, eras_fn))

else:
    # load from file if it already exists
    eras = pd.read_csv(os.path.join(eras_path, eras_fn))
    print('All ERA data loaded from file.')
    
eras['Date'] = pd.to_datetime(eras['Date'], format='mixed')
print('Number of sites with ERA data = ', len(eras['site_name'].drop_duplicates()))
print('\nSites without ERA data:')
print([x for x in site_names if x not in eras['site_name'].drop_duplicates().values])
# eras

  0%|          | 0/195 [00:00<?, ?it/s]

All ERA data saved to file:  /Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/all_ERA_data/all_era_data.csv
Number of sites with ERA data =  188

Sites without ERA data:
['RGI60-01.22207', 'RGI60-02.05157', 'RGI60-02.06859', 'RGI60-02.12721', 'RGI60-02.12722', 'RGI60-02.13130', 'RGI60-02.16722']
