# Select study sites using the distribution of RGI glacier areas


In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Define paths in directory

In [None]:
# path to all RGI O1 and O2 outlines
RGI_path = '/Volumes/LaCie/raineyaberle/Research/PhD/GIS_data/RGI/'
# RGI shapefile names
RGI_fns = ['01_rgi60_Alaska/01_rgi60_Alaska.shp',
           '02_rgi60_WesternCanadaUS/02_rgi60_WesternCanadaUS.shp']
# path to glacier-snow-cover-analysis
code_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/glacier-snow-cover-analysis/'
# where to save selected glacier shapefiles
study_sites_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/study-sites/'

# Load necessary functions
sys.path.insert(1, os.path.join(code_path, 'functions'))
import utils as f


## Load all RGI O1 and O2 glacier outlines

In [None]:
# Create geopandas.DataFrame for storing RGIs
RGI = gpd.GeoDataFrame()
# Read RGI files
for RGI_fn in RGI_fns:
    file = gpd.read_file(os.path.join(RGI_path, RGI_fn))
    RGI = pd.concat([RGI, file])
RGI[['O1Region', 'O2Region']] = RGI[['O1Region', 'O2Region']].astype(int)
RGI = RGI.sort_values(by=['O1Region', 'O2Region']).reset_index(drop=True)

# Grab list of all unique regions and subregions in dataset
unique_subregion_counts = RGI[['O1Region', 'O2Region']].value_counts().reset_index(name='count')
unique_subregion_counts = unique_subregion_counts.sort_values(by=['O1Region', 'O2Region']).reset_index(drop=True)
unique_subregions = unique_subregion_counts[['O1Region', 'O2Region']].values
# Add column for subregion name
RGI['Subregion'] = ''
for o1region, o2region in unique_subregions:
    subregion_name, color = f.determine_subregion_name_color(o1region, o2region)
    RGI.loc[(RGI['O1Region']==o1region) & (RGI['O2Region']==o2region), 'Subregion'] = subregion_name
unique_subregion_counts

In [None]:
# Load AOIs for all completed sites
aois_fn = os.path.join(study_sites_path, '..', 'analysis', 'AOIs.gpkg') 
aois = gpd.read_file(aois_fn)
cols = ['O1Region', 'O2Region', 'Aspect', 'Slope', 'Area']
for col in cols:
    aois[col] = aois[col].astype(float)
# add subregion column
for i in range(0,len(aois)):
    o1region, o2region = aois.loc[i, ['O1Region', 'O2Region']].values
    aois.loc[i, 'Subregion'] = f.determine_subregion_name_color(o1region, o2region)[0]

aois

## Plot distribution of terrain parameters for all glaciers and analyzed glaciers

In [None]:
# Define columns and bins
columns = ['Aspect', 'Slope', 'Area', 'Subregion']
xlabels = ['Aspect [degrees]', 'Slope [degrees]', 'Area [km$^2$]', 'Subregion']
color = '#993404' #['#00441b', '#993404', '#4d004b', '#08306b']
bins = [np.linspace(0, 361, num=20),
        np.linspace(0, 51, num=20),
        [0, 10, 50, 100, 500, 1000, 5000],
        np.arange(0,len(unique_subregions))]

# Plot
fig, ax = plt.subplots(2, 2, figsize=(12,12))
ax = ax.flatten()
# iterate over columns
for column, xlabel, bin, i in list(zip(columns, xlabels, bins, np.arange(0,len(columns)) )):
    if column=='Subregion':
        sns.histplot(data=RGI, x=column, facecolor='grey', ax=ax[i])
        ax2 = ax[i].twinx()
        sns.histplot(data=aois, x=column, facecolor=color, alpha=0.5, ax=ax2)
        ax[i].set_xticks(np.arange(0,len(unique_subregions)))
        ax[i].set_xticklabels(RGI['Subregion'].drop_duplicates().values, rotation=90)
    else:
        sns.histplot(data=RGI, x=column, facecolor='grey', bins=bin, ax=ax[i])
        ax2 = ax[i].twinx()
        sns.histplot(data=aois, x=column, facecolor=color, alpha=0.5, bins=bin, ax=ax2)
    if column=='Area':
        ax[i].set_xscale('log')
    ax[i].set_yticks(ax[i].get_yticks())
    ax[i].set_yticklabels(ax[i].get_yticklabels(), color='grey')
    ax[i].set_xlabel(xlabel)
    ax[i].set_ylabel('')
    ax2.set_ylabel('')
    ax2.set_ylim(0, ax2.get_ylim()[1]*2)
    ax2.set_yticks(ax2.get_yticks())
    ax2.set_yticklabels(ax2.get_yticklabels(), color=color)
    ax2.spines['right'].set_color(color)

fig.subplots_adjust(wspace=0.4)
plt.show()

## Plot distributions by subregion

In [None]:
subregions = aois['Subregion'].drop_duplicates().values
columns = ['Aspect', 'Slope', 'Area']
xlabels = ['Aspect [degrees]', 'Slope [degrees]', 'Area [km$^2$]']
bins_list = [np.linspace(0, 361, num=20),
             np.linspace(0, 51, num=20),
             np.linspace(0, 300, num=20),
             np.arange(0,len(unique_subregions))]

# Set up figures
plt.rcParams.update({'font.sans-serif': 'Arial', 'font.size': 12})
fig, ax = plt.subplots(len(subregions), 3, figsize=(10, len(subregions)*2))

for i, subregion in enumerate(subregions):
    # Subset glaciers
    aois_subregion = aois.loc[aois['Subregion']==subregion]
    RGI_subregion = RGI.loc[RGI['Subregion']==subregion]

    # Plot all glaciers in subregion
    for j, (column, xlabel, bins) in enumerate(zip(columns, xlabels, bins_list)):
        ax[i,j].hist(RGI_subregion[column].values, bins=bins, facecolor='k', alpha=0.6)
        if j==0:
            ax[i,j].set_ylabel(subregion)
        aois_color = '#b35806'
        ax2 = ax[i,j].twinx()
        ax2.hist(aois_subregion[column].values, bins=bins, facecolor=aois_color, alpha=0.6)
        ax2.set_yticks(ax2.get_yticks())
        ax2.set_yticklabels(ax2.get_yticklabels(), color=aois_color)
        ax2.spines['right'].set_color(aois_color)
        ax2.tick_params(axis='y', color=aois_color)
        if i==0:
            ax[i,j].set_title(xlabel)

fig.tight_layout()
plt.show()

## Pick some sites based on distributions (done)

In [None]:
subregion = 'N. Cascades'
# completed sites in subregion
aois_subregion = aois.loc[aois['Subregion']==subregion]
o1region = aois_subregion['O1Region'].values[0]
o2region = aois_subregion['O2Region'].values[0]
RGI[['O1Region', 'O2Region']] = RGI[['O1Region', 'O2Region']].astype(int)
rgi_subregion = RGI.loc[(RGI['O1Region']==o1region) & (RGI['O2Region']==o2region)]
# NOT completed sites
site_names_not_done = [x for x in rgi_subregion['RGIId'].values if x not in aois_subregion['RGIId'].values]
rgi_not_dones = pd.DataFrame()
for site_name in site_names_not_done:
    rgi_not_done = rgi_subregion.loc[rgi_subregion['RGIId']==site_name]
    rgi_not_dones = pd.concat([rgi_not_dones, rgi_not_done])
rgi_not_dones.reset_index(drop=True, inplace=True)
rgi_not_dones

In [None]:
# Plot distributions for a single region
# subregion = 'N. Cascades'

RGI_subregion = RGI.loc[RGI['Subregion']==subregion]
AOIs_subregion = aois.loc[aois['Subregion']==subregion]

columns = ['Aspect', 'Slope', 'Area']
xlabels = ['Aspect [degrees]', 'Slope [degrees]', 'Area [km$^2$]']
colors = ['#00441b', '#993404', '#4d004b']
bins = [np.linspace(0, 361, num=12),
        np.linspace(0, 51, num=12),
        np.linspace(0, np.nanmax(RGI_subregion['Area']), num=12)]
fig, ax = plt.subplots(1, 3, figsize=(18,6))
# iterate over columns
for column, xlabel, color, bin, i in list(zip(columns, xlabels, colors, bins, np.arange(0,len(columns)) )):
    bin_centers = [(bin[j+1] + bin[j])/2 for j in np.arange(0, len(bin)-1)]
    RGI_counts = np.zeros(len(bin_centers))
    AOI_counts = np.zeros(len(bin_centers))
    # iterate over bins
    for j in np.arange(0,len(bin)-1):
        # determine RGI outlines in aspect bin
        RGI_bin = RGI_subregion.loc[(RGI_subregion[column] >= bin[j]) & (RGI_subregion[column] < bin[j+1])]
        RGI_counts[j] = len(RGI_bin)
        # determine completed sites in aspect bin
        AOI_bin = AOIs_subregion.loc[(AOIs_subregion[column] >= bin[j]) & (AOIs_subregion[column] < bin[j+1])]
        AOI_counts[j] = len(AOI_bin)
    # plot
    ax[i].bar(bin_centers, RGI_counts, width=(bin[1]-bin[0]), facecolor=color, edgecolor='white', alpha=0.5)
    ax2 = ax[i].twinx()
    ax2.bar(bin_centers, AOI_counts, width=(bin[1]-bin[0]), facecolor=color, edgecolor='white')
    ax[i].set_yticks(ax[i].get_yticks())
    ax[i].set_yticklabels(ax[i].get_yticklabels(), color=color, alpha=0.5)
    ax[i].set_xlabel(xlabel)
    ax2.set_ylim(0, np.nanmax(AOI_counts)*3)
    ax2.set_yticks(ax2.get_yticks())
    ax2.set_yticklabels(ax2.get_yticklabels(), color=color)

plt.show()

In [None]:
subset = RGI_subregion.loc[(RGI_subregion['Aspect'] > 150) & (RGI_subregion['Aspect'] < 250) 
                            & (RGI_subregion['Slope'] > 15)
                            & (RGI_subregion['Area'] < 50) & (RGI_subregion['Area'] > 10)]
subset.reset_index(drop=True, inplace=True)
# check if already completed
Idrop = [i for i in np.arange(0,len(subset)) if subset.iloc[i]['RGIId'] in aois['RGIId'].values]
if len(Idrop)>0:
    subset = subset.drop(index=Idrop)
subset[['RGIId', 'Aspect', 'Slope', 'Area', 'Subregion']].reset_index(drop=True)


In [None]:
ids = ['RGI60-01.00032', 'RGI60-01.00033','RGI60-01.12347', 'RGI60-01.15719', 'RGI60-01.19783', 
      'RGI60-01.22207', 'RGI60-01.12355', 'RGI60-01.22204', 'RGI60-01.23094']
for id in ids:
    print(id)
    RGI_site = RGI.loc[RGI['RGIId']==id]
    RGI_site.plot()
    plt.show()
    # check if folder already exists in study_sites_path
    site_path = os.path.join(study_sites_path, id)
    if not os.path.exists(site_path):
        # make directories
        os.mkdir(site_path)
        print('Made directory: ' + site_path)
    AOI_path = os.path.join(study_sites_path, id, 'AOIs')
    if not os.path.exists(AOI_path):
        os.mkdir(AOI_path)
        print('Made directory: ' + AOI_path)
    if not os.path.exists(os.path.join(site_path, 'imagery')):
        os.mkdir(os.path.join(site_path, 'imagery'))
    # save shapefile
    RGI_site.reset_index(drop=True, inplace=True)
    AOI_fn = id + '_outline.shp'
    RGI_site.to_file(os.path.join(AOI_path, AOI_fn), index=False)
    print('Glacier boundary saved to file')
    print(' ')