# RGI03 (Arctic Canada North)

F. Maussion & S. Galos

In [None]:
import pandas as pd
import geopandas as gpd
import subprocess
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
from utils import (mkdir, submission_summary, needs_size_filter, size_filter, plot_map, plot_date_hist, 
                   find_duplicates, open_zip_shapefile, correct_geoms, fix_overaps)
import os

## Files and storage paths

In [None]:
# Region of interest
reg = 3

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '03_rgi60_ArcticCanadaNorth.zip')

# Supporting shapes and data
support_dir = os.path.join(data_dir, 'l0_support_data')

### Load the input data

In [None]:
# Read L2 files
shp = gpd.read_file('tar://' + l2_dir + f'/RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp')

### List of submissions 

In [None]:
sdf, _ = submission_summary(shp)
sdf

Notes based on inidivual submission evaluations: 

- 590: This is the RGI, without Melville Island
- 635: are seven glaciers on Melville Island, yet not what is used by RGI6 (RGI6 outlines for Melville are not available in GLIMS)
- 660, 661: also Melville Island. A bit further to 2000 than 635.
- 728: Three parts: Elsemere (Central), Axel Heiberg, Ellesmere (South West). To be used in RGI7?
- 723: Ellesmere North - to be used in RGI7
- 713, 712, 711: Ellesmere North - too late, not for RGI7
- 755: missing outlines added by Will: https://github.com/GLIMS-RGI/rgi7_scripts/issues/34

In [None]:
# # Write out selection in intermediate shape files for manual GIS review
# tmp_output_dir = mkdir(os.path.join(data_dir, 'l0_tmp_data', f'rgi{reg:02d}_inventories'))
# tmp_output_dir_tar = mkdir(os.path.join(data_dir, 'l0_tmp_data'))
# for subid in shp.subm_id.unique():
#     s_loc = shp.loc[shp.subm_id == subid]
#     s_loc.to_file(tmp_output_dir + f'/subm_{int(subid):03d}.shp')
# print('Taring...')
# print(subprocess.run(['tar', '-zcvf', f'{tmp_output_dir_tar}/rgi{reg:02d}_inventories.tar.gz', '-C', 
#                       os.path.join(data_dir, 'l0_tmp_data'), f'rgi{reg:02d}_inventories']))

### Read in the geometry data for sub-inventory selection 

In [None]:
# Read L2 files
shp_loc = gpd.read_file('tar://' + support_dir + f'/sub_inventory_sel_RGI03.tar.gz/sub_inventory_sel_RGI03.shp')

In [None]:
shp_loc.plot(edgecolor='k');

In [None]:
shp_loc

In [None]:
# Test the polygons I drew - each subregion should be equivalent as the sel by id
# (Except RGI6 which will be replaced in parts)
for sub_id in [635, 728, 723]:
    sel = shp.loc[shp['subm_id'] == sub_id].copy()
    rp = sel.representative_point().to_frame('geometry')
    rp['orig_index'] = sel.index
    intersect = gpd.overlay(rp, shp_loc.loc[shp_loc['subm_id'] == sub_id], how='intersection')
    odf = sel.loc[intersect['orig_index']]
    assert len(sel) == len(odf)

### Apply selection criteria to create the RGI7 data subset

In [None]:
# Melville
rgi7_Melville = shp.loc[shp['subm_id'].isin([635])].copy()
print(len(rgi7_Melville))
rgi7_Melville = size_filter(rgi7_Melville)
print(len(rgi7_Melville))

rgi7_Melville['is_rgi6'] = False

In [None]:
# Ellesmere North
rgi7_North = shp.loc[shp['subm_id'].isin([723])].copy()
print(len(rgi7_North))
rgi7_North = size_filter(rgi7_North)
print(len(rgi7_North))

rgi7_North['is_rgi6'] = False

In [None]:
# Ellesmere Central + Axel Heiberg
rgi7_Central = shp.loc[shp['subm_id'].isin([728])].copy()
print(len(rgi7_Central))
rgi7_Central = size_filter(rgi7_Central)
print(len(rgi7_Central))

rgi7_Central['is_rgi6'] = False

In [None]:
# Addons from Will: https://github.com/GLIMS-RGI/rgi7_scripts/issues/34
rgi7_Addons = shp.loc[shp['subm_id'].isin([755])].copy()
print(len(rgi7_Addons))
rgi7_Addons = size_filter(rgi7_Addons)
print(len(rgi7_Addons))

rgi7_Addons['is_rgi6'] = False

In [None]:
# Rest (Devon, East, etc)
rgi7_South = shp.loc[shp['subm_id'].isin([590])].copy()
print(len(rgi7_South))

# Sel by canvas
rp = rgi7_South.representative_point().to_frame('geometry')
rp['orig_index'] = rgi7_South.index
intersect = gpd.overlay(rp, shp_loc.loc[shp_loc['subm_id'] == 590], how='intersection')
rgi7_South = rgi7_South.loc[intersect['orig_index']]

# Sel by size
print(len(rgi7_South))
rgi7_South = size_filter(rgi7_South)
print(len(rgi7_South))

rgi7_South['is_rgi6'] = True

In [None]:
# combine the geodataframes
rgi7 = pd.concat([rgi7_Melville, rgi7_North, rgi7_Central, rgi7_Addons, rgi7_South])

In [None]:
# Sel by size
print(len(rgi7))
rgi7 = size_filter(rgi7)
print(len(rgi7))

In [None]:
# This is a triangle
len_bef = len(rgi7)
rgi7 = rgi7.loc[rgi7.anlys_id != 308558].copy()  # G281934E76633N
assert len(rgi7) == (len_bef - 1)

### Some sanity checks 

In [None]:
dupes = find_duplicates(rgi7) 

In [None]:
# for i in np.arange(1, 12, 2):
#     plt.figure();
#     f, axs = plt.subplots(1, 2)
#     dupes.iloc[[i-1]].plot(ax=axs[0]);
#     dupes.iloc[[i]].plot(ax=axs[1]);
# The first 4 pairs are duplicates, the last two pairs are really weird shapes that belong deleted

In [None]:
to_remove = dupes.iloc[[1, 3, 5, 7, 8, 9, 10, 11]]
rgi7 = rgi7.loc[~rgi7.anlys_id.isin(to_remove.anlys_id)]
find_duplicates(rgi7);

In [None]:
rgi7 = correct_geoms(rgi7)

In [None]:
rgi7 = fix_overaps(rgi7)

In [None]:
rgi7 = correct_geoms(rgi7)

In [None]:
print(len(rgi7))
rgi7 = size_filter(rgi7)
print(len(rgi7))

In [None]:
sdf, df_class = submission_summary(rgi7)
df_class

In [None]:
# Check the orphaned rock outcrops
orphan_f = os.path.join(data_dir, 'l1_orphan_interiors', f'RGI{reg:02d}', f'RGI{reg:02d}.shp')
if os.path.exists(orphan_f):
    orphan_f = gpd.read_file(orphan_f)
    check = np.isin(rgi7.subm_id.unique(), orphan_f.subm_id.unique())
    if np.any(check):
        print(f'Orphan rock outcrops detected in subm_id {rgi7.subm_id.unique()[check]}')
        orphan_f['area'] = orphan_f.to_crs({'proj':'cea'}).area

### Plots 

In [None]:
plot_map(rgi7, reg, loc='upper left')

In [None]:
plot_map(rgi7, reg, loc='upper left', is_rgi6=True)

In [None]:
plot_date_hist(rgi7, reg)

### Text for github

In [None]:
fgh = sdf.T
fgh

In [None]:
print(fgh.to_markdown(headers=np.append(['subm_id'], fgh.columns)))

## Write out and tar 

In [None]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi7.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

## New RGI-file created - Check result!

### load reference data (here RGI6 and Frank's) to enable comparison

In [None]:
# load reference data
from utils import open_zip_shapefile

# RGI6
ref_rgi6 = open_zip_shapefile(rgi6_reg_file)
# FP
ref_fp = open_zip_shapefile(os.path.join(support_dir, 'c3s_gi_rgi03_l7_1999_v1.zip'))
# Will
ref_wk = open_zip_shapefile(os.path.join(support_dir, 'RGI6_Kochtitzky_final_3Sep2021.zip'))

### Compare new RGI7-file to RGI6

### Number of elements (differences do not depict problems)

In [None]:
print('Number of glaciers in new RGI:', len(rgi7))
print('Number of glaciers in RGI6:', len(ref_rgi6))
print('Difference:', len(rgi7)-len(ref_rgi6))

### How many nominal glaciers were there in RGI06?

In [None]:
len(ref_rgi6.loc[ref_rgi6.Status == 2])

### Total area

In [None]:
# add an area field to RGI_ss and reference data
ref_rgi6['area'] = ref_rgi6.to_crs({'proj':'cea'}).area
ref_fp['area'] = ref_fp.to_crs({'proj':'cea'}).area
ref_wk['area'] = ref_wk.to_crs({'proj':'cea'}).area

In [None]:
# print and compare area values
Area_RGI = rgi7['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_RGI)
Area_ref = ref_rgi6['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_ref)
d = (Area_RGI - Area_ref)
print('Area difference [km²]:', d)

### Northern Ellesmere (723, Frank Paul, no problem there):  

In [None]:
print('Number of glaciers in RGI7 subset:', len(rgi7_North))
print('Number of glaciers in reference data (FP):', len(ref_fp))
print('Difference:', len(rgi7_North)-len(ref_fp))

In [None]:
# print and compare area values
Area_7 = rgi7_North['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_7)
Area_6 = ref_fp['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_6)
d = (Area_7 - Area_6)
print('Area difference [km²]:', d)

This is brilliant! No issue there.

### Central Ellesmere (728, Coppland, no problem there):  

In [None]:
print(len(ref_wk))
ref_wk = ref_wk.loc[np.round(ref_wk['area'] * 1e-6, 3) >= 0.01].copy()
len(ref_wk)

In [None]:
print('Number of glaciers in RGI7 subset:', len(rgi7_Central))
print('Number of glaciers in reference data (FP):', len(ref_wk))
print('Difference:', len(rgi7_Central)-len(ref_wk))

In [None]:
# print and compare area values
Area_7 = rgi7_Central['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_7)
Area_6 = ref_wk['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_6)
d = (Area_7 - Area_6)
print('Area difference [km²]:', d)

This is brilliant! No issue there.

### Southern Ellesmere (590, RGI6, no problem there):  

In [None]:
rp = ref_rgi6.representative_point().to_frame('geometry')
rp['orig_index'] = ref_rgi6.index
intersect = gpd.overlay(rp, shp_loc.loc[shp_loc['subm_id'] == 590], how='intersection')
ref_rgi6_south = ref_rgi6.loc[intersect['orig_index']].copy()

In [None]:
print('Number of glaciers in RGI7 subset:', len(rgi7_South))
print('Number of glaciers in reference data (RGI6):', len(ref_rgi6_south))
print('Difference:', len(rgi7_South)-len(ref_rgi6_south))

In [None]:
# print and compare area values
Area_7 = rgi7_South['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_7)
Area_6 = ref_rgi6_south['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_6)
d = (Area_7 - Area_6)
print('Area difference [km²]:', d)

Almost no issue - some weird polygon outcrops here and there. 

### Melville Island (635) not much to compare here

In [None]:
rp = ref_rgi6.representative_point().to_frame('geometry')
rp['orig_index'] = ref_rgi6.index
intersect = gpd.overlay(rp, shp_loc.loc[shp_loc['subm_id'] == 635], how='intersection')
ref_rgi6_melville = ref_rgi6.loc[intersect['orig_index']].copy()

In [None]:
print('Number of glaciers in RGI7 subset:', len(rgi7_Melville))
print('Number of glaciers in reference data (RGI6):', len(ref_rgi6_melville))
print('Difference:', len(rgi7_Melville)-len(ref_rgi6_melville))

In [None]:
# print and compare area values
Area_7 = rgi7_Melville['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_7)
Area_6 = ref_rgi6_melville['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_6)
d = (Area_7 - Area_6)
print('Area difference [km²]:', d)

In [None]:
f, ax = plt.subplots(figsize=(14, 14))
rgi7_Melville.plot(ax=ax, facecolor='None', edgecolor='C0');
ref_rgi6_melville.plot(ax=ax, facecolor='None', edgecolor='C3');
ax.legend(handles=[
            mpatches.Patch(facecolor='C0', label='635 - Berthier'),
            mpatches.Patch(facecolor='C3', label='RGI6'),
          ], loc='upper left');