# Compile data: snowlines, ERA, AOIs

In [None]:
import os
import glob
import pandas as pd
import geopandas as gpd
from tqdm.auto import tqdm
import numpy as np

In [None]:
# -----Path to data
scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'

# -----Path to study-sites/
study_sites_path = os.path.join(scm_path, 'study-sites')

# -----Load study site names
site_names = [x for x in sorted(os.listdir(study_sites_path)) if 'RGI' in x]
print('Number of study sites = ', len(site_names))
site_names

## Load and compile snowlines

In [None]:
snowlines_path = os.path.join(scm_path, 'all_snowlines')
snowlines_fn = 'all_snowlines.csv'

def load_site_snowlines(site_name, study_sites_path):
    snowline_path = os.path.join(study_sites_path, site_name)
    snowline_fns = glob.glob(os.path.join(snowline_path, '*_snowlines.csv'))
    if len(snowline_fns) > 0:
        snowline_fn = snowline_fns[0]
        snowline = pd.read_csv(snowline_fn)
    else:
        snowline = 'N/A'
    return snowline
    
# check if snowlines path exists
if not os.path.exists(snowlines_path):
    os.mkdir(snowlines_path)
# check if all snowlines CSV exists
if not os.path.exists(os.path.join(snowlines_path, snowlines_fn)):
    # compile all RGI glacier boundaries
    snowlines = pd.DataFrame()
    for site_name in tqdm(site_names):
        snowlines_site = load_site_snowlines(site_name, study_sites_path)
        if type(snowlines_site) != str:
            snowlines = pd.concat([snowlines, snowline])
    snowlines.reset_index(drop=True, inplace=True)
    snowlines.to_csv(os.path.join(snowlines_path, snowlines_fn), index=False)
    print('All snowlines saved to file: ', os.path.join(snowlines_path, snowlines_fn))

else:
    # Load from file if it already exists
    snowlines = pd.read_csv(os.path.join(snowlines_path, snowlines_fn))
    snowlines['datetime'] = pd.to_datetime(snowlines['datetime'], format='mixed')
    print('All snowlines loaded from file.')
    # Check if more sites need to be added to snowlines
    site_names_no_snowlines = [x for x in site_names if x not in snowlines['site_name'].drop_duplicates().values]
    updated = False
    for site_name in site_names_no_snowlines:
        print(f'Adding {site_name} to snowlines...')
        snowlines_site = load_site_snowlines(site_name, study_sites_path)
        if type(snowlines_site) != str:
            snowlines = pd.concat([snowlines, snowline])
    snowlines.reset_index(drop=True, inplace=True)
    if updated:
        # re-save snowlines to file
        snowlines.to_csv(os.path.join(snowlines_path, snowlines_fn), index=False)
        print('All snowlines saved to file: ', os.path.join(snowlines_path, snowlines_fn))
        # re-define site names with no snowlines
        site_names_no_snowlines = [x for x in site_names if x not in 
                                   snowlines['site_name'].drop_duplicates().values]

print('\nNumber of sites with snowlines files:', len(snowlines['site_name'].drop_duplicates()))
print(f'\nSites without snowline files: N={len(site_names_no_snowlines)} \n{site_names_no_snowlines}')
# snowlines

## Load and compile glacier boundaries

In [None]:
aois_path = os.path.join(scm_path, 'all_AOIs')
aois_fn = 'all_aois.shp'

def load_site_aoi(site_name, study_sites_path):
    aoi_path = os.path.join(study_sites_path, site_name, 'AOIs')
    aoi_fns = glob.glob(os.path.join(aoi_path, '*RGI*.shp'))
    if len(aoi_fns) > 0:
        aoi_fn = aoi_fns[0]
        aoi = gpd.read_file(aoi_fn)
        aoi = aoi.to_crs('EPSG:4326')
    else:
        aoi = 'N/A'
    return aoi
    
# check if aois path exists
if not os.path.exists(aois_path):
    os.mkdir(aois_path)
# check if all aois shapefile exists
if not os.path.exists(os.path.join(aois_path, aois_fn)):
    # compile all RGI glacier boundaries
    aois = gpd.GeoDataFrame()
    for site_name in tqdm(site_names):
        aoi = load_site_aoi(site_name, study_sites_path)
        if type(aoi) != str:
            aois = pd.concat([aois, aoi])
    aois.reset_index(drop=True, inplace=True)
    aois.to_file(os.path.join(aois_path, aois_fn), index=False)
    print('All glacier boundaries saved to file: ', os.path.join(aois_path, aois_fn))

else:
    # Load from file if it already exists
    aois = gpd.read_file(os.path.join(aois_path, aois_fn))
    print('All glacier boundaries loaded from file.')
    # Check if more sites need to be added to AOIs
    site_names_no_aois = [x for x in site_names if x not in aois['RGIId'].drop_duplicates().values]
    updated = False
    for site_name in site_names_no_aois:
        print(f'Adding {site_name} to AOIs...')
        aoi = load_site_aoi(site_name, study_sites_path)
        if type(aoi) != str:
            aois = pd.concat([aois, aoi])
    if updated:
        # re-save AOIs to file
        aois.reset_index(drop=True, inplace=True)
        aois.to_file(os.path.join(aois_path, aois_fn), index=False)
        print('All glacier boundaries saved to file: ', os.path.join(aois_path, aois_fn))
        # re-define site names with no snowlines
        site_names_no_aois = [x for x in site_names if x not in aois['RGIId'].drop_duplicates().values]

aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('Number of sites with glacier boundaries = ', len(aois['RGIId'].drop_duplicates()))
print(f'\nSites without glacier boundaries: N={len(site_names_no_aois)} \n{site_names_no_aois}')
# aois

## Load and compile ERA data

In [None]:
eras_path = os.path.join(scm_path, 'all_ERA_data')
eras_fn = 'all_era_data.csv'

def load_site_era_data(site_name, study_sites_path):
    era_path = os.path.join(study_sites_path, site_name, 'ERA')
    era_fns = glob.glob(os.path.join(era_path, '*ERA*.csv'))
    if len(era_fns) > 0:
        era_fn = era_fns[0]
        era = pd.read_csv(era_fn)
        era['site_name'] = site_name
    else:
        era = 'N/A'
    return era
    
# Check if ERA path exists
if not os.path.exists(eras_path):
    os.mkdir(eras_path)
# Check if ERA CSV exists
if not os.path.exists(os.path.join(eras_path, eras_fn)):
    # Compile all ERA data
    eras = pd.DataFrame()
    site_names = [os.path.basename(x) for x in sorted(glob.glob(os.path.join(scm_path, 'study-sites', 'RGI*')))]
    for site_name in tqdm(site_names):
        era_site = load_site_era_data(site_name, study_sites_path)
        if type(era_site) != str:
            eras = pd.concat([eras, era_site])
    eras.reset_index(drop=True, inplace=True)
    eras.to_csv(os.path.join(eras_path, eras_fn), index=False)
    print('All ERA data saved to file: ', os.path.join(eras_path, eras_fn))
    site_names_no_era = [x for x in site_names if x not in eras['site_name'].drop_duplicates().values]

else:
    # Coad from file if it already exists
    eras = pd.read_csv(os.path.join(eras_path, eras_fn))
    print('All ERA data loaded from file.')
    # Check if more sites need to be added to ERAs
    site_names_no_era = [x for x in site_names if x not in eras['site_name'].drop_duplicates().values]
    updated = False
    for site_name in site_names_no_era[1:]:
        print(f'Adding {site_name} to ERAs...')
        era_site = load_site_era_data(site_name, study_sites_path)
        if type(era_site) != str:
            eras = pd.concat([eras, era_site])
    if updated:
        # re-save ERAs to file
        eras.reset_index(drop=True, inplace=True)
        eras.to_csv(os.path.join(eras_path, eras_fn), index=False)
        print('All ERA data saved to file: ', os.path.join(eras_path, eras_fn))
        # re-define site names with no snowlines
        site_names_no_era = [x for x in site_names if x not in eras['site_name'].drop_duplicates().values]

print('Number of sites with ERA = ', len(eras['site_name'].drop_duplicates()))
print(f'\nSites without ERA: N={len(site_names_no_era)} \n{site_names_no_era}')
# eras