# Select study sites using the distribution of RGI glacier areas


In [None]:
!pip install contextily

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import operator
import contextily as cx

In [None]:
# -----Define paths in directory
# path to RGI data
RGI_path = '/Volumes/GoogleDrive/My Drive/Research/PhD/GIS_data/RGI/'
# RGI shapefile names
RGI_fns = ['01_rgi60_Alaska/01_rgi60_Alaska.shp',
           '02_rgi60_WesternCanadaUS/02_rgi60_WesternCanadaUS.shp']
# where to save selected glacier shapefiles
out_path = '/Volumes/GoogleDrive/My Drive/Research/PhD/snow_cover_mapping/snow_cover_mapping_application/study-sites/'

In [None]:
# -----Load, format, filter, plot RGI glacier outlines
# Create geopandas.DataFrame for storing RGIs
RGI = gpd.GeoDataFrame()
# Read RGI files
for RGI_fn in RGI_fns:
    file = gpd.read_file(RGI_path + RGI_fn)
    RGI = pd.concat([RGI, file])
# subset to glaciers with area > 5 km^2
RGI_gt5 = RGI.loc[RGI['Area'] > 5].reset_index(drop=True)
# change int data types to float for saving
RGI_gt5[['Zmin', 'Zmax', 'Zmed', 'Slope', 'Aspect', 'Lmax', 'Status', 'Connect',
         'Form', 'TermType', 'Surging', 'Linkages']] = RGI_gt5[['Zmin', 'Zmax', 'Zmed', 'Slope', 'Aspect', 'Lmax',
                                                            'Status', 'Connect', 'Form', 'TermType', 'Surging', 'Linkages']].astype(float)
# grab list of all unique regions and subregions in dataset
regions_subregions = sorted(RGI_gt5[['O1Region', 'O2Region']].drop_duplicates().values,
                            key=operator.itemgetter(0, 1))

# -----Plot all sites with color distinguishing subregions
subregions_names = ['Brooks Range', 'Alaska Range', 'Aleutians', 'W. Chugach Mtns.', 'St. Elias Mtns.',
                    'N. Coast Ranges', 'N. Rockies', 'N. Cascades', 'S. Rockies', 'S. Cascades']
subregions_colors = ['c', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',
                     '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a']
fig1, ax1 = plt.subplots(1, 1, figsize=(12,10))
plt.rcParams.update({'font.size':12, 'font.sans-serif':'Arial'})
crs = 'EPSG:9822' # Albers Equal Conic projection
i=0
for region, subregion in regions_subregions:
    RGI_gt5_subregion = RGI_gt5.loc[(RGI_gt5['O1Region']==region) & (RGI_gt5['O2Region']==subregion)]
    RGI_gt5_subregion_reproj = RGI_gt5_subregion.to_crs(crs)
    for j in range(0, len(RGI_gt5_subregion)):
        polygon = RGI_gt5_subregion_reproj.iloc[j]['geometry']
        if j==0:
            label=subregions_names[i]
        else:
            label='_nolegend_'
        ax1.plot(*polygon.exterior.xy, label=label, color=subregions_colors[i])
    i+=1
cx.add_basemap(ax=ax1, crs=crs, source=cx.providers.Esri.WorldShadedRelief, attribution=False)
ax1.legend(loc='center right', title='RGI Subregions', bbox_to_anchor=[1.25, 0.5, 0.2, 0.2])
ax1.set_xticklabels([])
ax1.set_yticklabels([])
ax1.grid()
plt.show()

# -----Save figure to file
fig1.savefig(out_path + '../RGI_regions_1+2.png', facecolor='w', dpi=300)
print('RGI subregions figure saved to file')

In [None]:
# Select random glaciers from each subregion that represent the overall area distribution
# and save shapefiles to out_path

nsites = 10 # total number of sites from each subregion
count = 0 # glacier count for plotting
k=0
for region, subregion in regions_subregions:

    print('RGI Subregion '+subregions_names[k])
    print('----------')

    # subset dataset to subregion
    RGI_gt5_subregion = RGI_gt5.loc[(RGI_gt5['O1Region']==region) & (RGI_gt5['O2Region']==subregion)]

    # calculate min and max glacier areas in subregion
    area_min = np.nanmin(RGI_gt5_subregion['Area'].values)
    area_max = np.nanmax(RGI_gt5_subregion['Area'].values)

    # set up figure
    fig2, ax2 = plt.subplots(1, 2, figsize=(12, 6))
    # calculate histogram of overall distribution
    counts, edges, bars = ax2[0].hist(RGI_gt5_subregion['Area'].values, bins=10,
                                  range=[np.round(area_min), np.round(area_max)],
                                    edgecolor='k', facecolor='#2b8cbe')

    # Plot distributions of slope, area, and aspect for each subregion








    # randomly select nsites using the histogram bins
    # counts_selected = np.zeros(len(counts), dtype=int)
    # print('Selected glaciers:')
    # for i in range(0,len(counts)):
    #     # identify number of sites to select from each bin
    #     counts_selected[i] = int(np.round(counts[i] / len(RGI_gt5_subregion) * nsites))
    #     if counts_selected[i] > 0:
    #         # subset sites to those within that area bin
    #         RGI_gt5_subregion_bin = RGI_gt5_subregion.loc[(RGI_gt5_subregion['Area']>edges[i]) &
    #                                                       (RGI_gt5_subregion['Area']<edges[i+1])]
    #         # generate counts_selected random numbers
    #         Iselected = np.random.randint(low=0, high=len(RGI_gt5_subregion_bin), size=counts_selected[i])
    #         RGI_selected = RGI_gt5_subregion_bin.iloc[Iselected]
    #         print(RGI_selected[['RGIId', 'Name']].values)
            # save selected study sites to file
            # for j in range(0,len(RGI_selected)):
            #     RGI_select = gpd.GeoDataFrame(RGI_selected.iloc[j]).transpose().reset_index(drop=True)
            #     RGI_select.crs = RGI_selected.crs # set CRS
            #     RGIId = RGI_select['RGIId'].values[0]
            #     out_fn =  RGIId + '.shp'
            #     # make folder for study site
            #     if os.path.exists(out_path)==False:
            #         os.mkdir(out_path)
            #     if os.path.exists(out_path + RGIId + '/')==False:
            #         os.mkdir(out_path + RGIId + '/')
            #     if os.path.exists(out_path + RGIId+ '/AOIs/')==False:
            #         os.mkdir(out_path + RGIId+ '/AOIs/')
            #     # save file in folder
            #     RGI_select.to_file(out_path + RGIId + '/AOIs/' + out_fn)
            #     # plot RGI outlines on figure 1
            #     RGI_select_reproj = RGI_select.to_crs(crs)
            #     polygon = RGI_select_reproj['geometry'].values[0]
            #     if count==0:
            #         label='Selected sites'
            #     else:
            #         label='_nolegend_'
            #     centroid = polygon.centroid
            #     ax1.plot(*centroid.coords.xy, '*k', markersize=5, label=label)
            #     count+=1
            # print('selected sites saved to shapefiles in out_path.')

    # # adjust axes
    # ax2[0].bar_label(bars)
    # ax2[0].set_xlabel('Glacier area [km$^2$]')
    # ax2[0].set_ylabel('Count')
    # ax2[0].set_title('Overall areas distribution')
    # ax2[0].grid()
    # # plot histogram for selected glaciers
    # bin_centers = np.array([(edges[i+1]-edges[i])/2+edges[i] for i in range(0, len(edges)-1)])
    # ax2[1].bar(bin_centers, counts_selected, width=edges[1]-edges[0], edgecolor='k', facecolor='#feb24c')
    # ax2[1].set_xlabel('Glacier area [km$^2$]')
    # ax2[1].grid()
    fig2.suptitle('RGI Subregion '+subregions_names[k])
    plt.show()

    print(' ')

    k+=1

# -----Save figure 1 with selected study sites plotted
ax1.legend(loc='best', title='RGI Subregions')
fig1.savefig(out_path + '../selected_sites.png', facecolor='w', dpi=300)
print('Selected study sites figure to file')