# RGI19 (Antarctic and Subantarctic)

F. Maussion & S. Galos

Several changes to RGI6

In [None]:
import pandas as pd
import geopandas as gpd
import subprocess
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
from utils import mkdir, submission_summary, needs_size_filter, size_filter, plot_map, plot_date_hist, open_zip_shapefile, find_duplicates, find_neighbors, correct_geoms
import os

## Files and storage paths

In [None]:
# Region of interest
reg = 19

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '19_rgi60_AntarcticSubantarctic.zip')

### Load the input data

In [None]:
# Read L2 files
shp = gpd.read_file('tar://' + l2_dir + f'/RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp')

### List of submissions 

In [None]:
sdf, _ = submission_summary(shp)
sdf

- 585 is the RGI6
- 700 is a better quality inventory for the South Sandwich Islands in 19-03
- 769 is a better quality Kerguelen inventory
- 798 is a better inventory for subregions 19-12 and 19-13 (we remove 19-12 which has been mapped by 798)
- 805 is a better inventory for subregions 19-14 and 19-24
- 806 is a better inventory for subregions 19-11 and 19-12
- 807 is a better inventory for subregions 19-01, 19-05, 19-17, and parts of 19-02 (South Orkney Islands)
- 808 is a better quality inventory for the Bouvet Island in 19-03
- 811 is a better inventory for subregions 19-15 and 19-16

In [None]:
# # Optional: write out selection in intermediate shape files for manual GIS review
# tmp_output_dir = mkdir(os.path.join(data_dir, 'l0_tmp_data', f'rgi{reg:02d}_inventories'))
# tmp_output_dir_tar = mkdir(os.path.join(data_dir, 'l0_tmp_data'))
# for subid in shp.subm_id.unique():
#     s_loc = shp.loc[shp.subm_id == subid]
#     s_loc.to_file(tmp_output_dir + f'/subm_{int(subid):03d}.shp')
# print('Taring...')
# print(subprocess.run(['tar', '-zcvf', f'{tmp_output_dir_tar}/rgi{reg:02d}_inventories.tar.gz', '-C', 
#                       os.path.join(data_dir, 'l0_tmp_data'), f'rgi{reg:02d}_inventories']))

## Outline selection 

In [None]:
s_585 = shp.loc[shp['subm_id']==585].copy()
s_585['is_rgi6'] = True

In [None]:
s_700 = shp.loc[shp['subm_id']==700].copy()
s_700['is_rgi6'] = False

In [None]:
s_769 = shp.loc[shp['subm_id']==769].copy()
s_769['is_rgi6'] = False

In [None]:
s_798 = shp.loc[shp['subm_id']==798].copy()
s_798['is_rgi6'] = False

In [None]:
s_805 = shp.loc[shp['subm_id']==805].copy()
s_805['is_rgi6'] = False

In [None]:
s_806 = shp.loc[shp['subm_id']==806].copy()
s_806['is_rgi6'] = False

In [None]:
s_807 = shp.loc[shp['subm_id']==807].copy()
s_807['is_rgi6'] = False

In [None]:
s_808 = shp.loc[shp['subm_id']==808].copy()
s_808['is_rgi6'] = False

In [None]:
s_811 = shp.loc[shp['subm_id']==811].copy()
s_811['is_rgi6'] = False

### Kerguelen 

In [None]:
# Read box containing glaciers with subm_id = 769
box = open_zip_shapefile(data_dir + f'/l0_support_data/kerguelen.zip')

In [None]:
# Plot correct glaciers and shapefile
base = box.plot(color='none', edgecolor='black')
s_769.geometry.plot(ax=base, color='C0', markersize=5);

Now we remove the glaciers inside from the other one:

In [None]:
# Save glacier centroid in rp:
rp = s_585.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_585.index

# Find non overlying polygons
difference = gpd.overlay(rp, box, how='difference')

# Create temporary variable
s_585_sel = s_585.loc[difference['orig_index']]

len(s_585) - len(s_585_sel), len(s_769)

In [None]:
s_585 = s_585_sel

### South Orkney 

In [None]:
# Read box
box = open_zip_shapefile(data_dir + f'/l0_support_data/south-orkney.zip')

In [None]:
# Plot correct glaciers and shapefile
base = box.plot(color='none', edgecolor='black')
s_807.geometry.plot(ax=base, color='C0', markersize=5);

Now we remove the glaciers inside from the other one:

In [None]:
# Save glacier centroid in rp:
rp = s_585.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_585.index

# Find non overlying polygons
difference = gpd.overlay(rp, box, how='difference')

# Create temporary variable
s_585_sel = s_585.loc[difference['orig_index']]

len(s_585) - len(s_585_sel)

In [None]:
s_585 = s_585_sel

### South Sandwich 

In [None]:
# Read box containing glaciers with subm_id = 769
box = open_zip_shapefile(data_dir + f'/l0_support_data/Sandwich-Islands.zip')

In [None]:
# Plot correct glaciers and shapefile
base = box.plot(color='none', edgecolor='black')
s_700.geometry.plot(ax=base, color='C0', markersize=5);

Now we keep the glaciers inside the box and remove the ones from RGI6:

In [None]:
# Save glacier centroid in rp:
rp = s_700.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_700.index

# Find overlying polygons
intersection = gpd.overlay(rp, box, how='intersection')

# Create temporary variable
s_700_sel = s_700.loc[intersection['orig_index']]

len(s_700_sel) - len(s_700), len(s_700_sel)

In [None]:
# Save glacier centroid in rp:
rp = s_585.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_585.index

# Find non overlying polygons
difference = gpd.overlay(rp, box, how='difference')

# Create temporary variable
s_585_sel = s_585.loc[difference['orig_index']]

len(s_585) - len(s_585_sel), len(s_700_sel)

In [None]:
s_585 = s_585_sel
s_700 = s_700_sel

### Bouvet

In [None]:
# Read box
box = open_zip_shapefile(data_dir + f'/l0_support_data/bouvet.zip')

In [None]:
# Plot correct glaciers and shapefile
base = box.plot(color='none', edgecolor='black')
s_808.geometry.plot(ax=base, color='C0', markersize=5);

Now we remove the glaciers inside from the other one:

In [None]:
# Save glacier centroid in rp:
rp = s_585.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_585.index

# Find non overlying polygons
difference = gpd.overlay(rp, box, how='difference')

# Create temporary variable
s_585_sel = s_585.loc[difference['orig_index']]

len(s_585) - len(s_585_sel)

In [None]:
s_585 = s_585_sel

### Remove duplacted outline 19-12

In [None]:
s_798.loc[s_798.anlys_id == 995059].plot();

In [None]:
s_798 = s_798.loc[s_798.anlys_id != 995059].copy()

### New subregions outlines 

In [None]:
# Read subregs
sregs = open_zip_shapefile(os.path.join(data_dir, '00_rgi70_regions/00_rgi70_O2Regions.zip'))

In [None]:
sregs_sel = sregs.loc[sregs.o2region.isin(['19-01', '19-05', '19-11', '19-12', '19-13', '19-14', '19-15', '19-16', '19-17', '19-24'])]
sregs_sel

In [None]:
# Save glacier centroid in rp:
rp = s_585.representative_point()

rp = rp.to_frame('geometry')
rp['orig_index'] = s_585.index

# Find non overlying polygons
difference = gpd.overlay(rp, sregs_sel, how='difference')

# Create temporary variable
s_585_sel = s_585.loc[difference['orig_index']]

len(s_585) - len(s_585_sel)

In [None]:
s_585 = s_585_sel

### OK Merge 

In [None]:
# Merge original RGI and the new invs
rgi7 = pd.concat([s_585, s_700, s_769, s_798, s_805, s_806, s_807, s_808, s_811])

In [None]:
# Size filter?
print(len(rgi7))
rgi7 = size_filter(rgi7)
print(len(rgi7))

### Some sanity checks 

In [None]:
dupes = find_duplicates(rgi7)

In [None]:
rgi7 = rgi7.loc[rgi7.anlys_id != dupes.iloc[1].anlys_id]
dupes = find_duplicates(rgi7)

In [None]:
nn = find_neighbors(dupes, rgi7, n=2)
f, ax = plt.subplots()
nn.iloc[[1]].plot(ax=ax, edgecolor='k', facecolor='none');
dupes.plot(ax=ax, edgecolor='r', facecolor='none');

OK so this is obviously a misclassified rock outcrop: https://trello.com/c/UGyLbC87

Lets delete:

In [None]:
rgi7 = rgi7.loc[rgi7.anlys_id != 287466]
find_duplicates(rgi7);

In [None]:
rgi7 = correct_geoms(rgi7)

In [None]:
needs_size_filter(rgi7)

In [None]:
sdf, df_class = submission_summary(rgi7)
df_class

In [None]:
# Check the orphaned rock outcrops
orphan_f = os.path.join(data_dir, 'l1_orphan_interiors', f'RGI{reg:02d}', f'RGI{reg:02d}.shp')
if os.path.exists(orphan_f):
    orphan_f = gpd.read_file(orphan_f)
    check = np.isin(rgi7.subm_id.unique(), orphan_f.subm_id.unique())
    if np.any(check):
        print(f'Orphan rock outcrops detected in subm_id {rgi7.subm_id.unique()[check]}')
        orphan_f['area'] = orphan_f.to_crs({'proj':'cea'}).area
        orphan_f = orphan_f.loc[orphan_f.subm_id.isin(rgi7.subm_id.unique()[check])]

In [None]:
orphan_f['area'].sum() * 1e-6

### Plots 

In [None]:
plot_map(rgi7, reg, linewidth=3, loc='upper left')

In [None]:
plot_map(rgi7, reg, linewidth=3, is_rgi6=True, loc='upper left')

In [None]:
plot_date_hist(rgi7, reg, figsize=(16, 7))

### Text for github

In [None]:
fgh = sdf.T
fgh

In [None]:
print(fgh.to_markdown(headers=np.append(['subm_id'], fgh.columns)))

## Write out and tar 

In [None]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi7.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

## Consistency check with RGI6 

Much harder to do now given the complexity

#### load reference data (here RGI6 original) to enable comparison

In [None]:
# load reference data
from utils import open_zip_shapefile
ref_odf = open_zip_shapefile(rgi6_reg_file)

#### Number of elements (differences do not depict problems)

In [None]:
print('Number of glaciers in new RGI subset:', len(rgi7))
print('Number of glaciers in reference data:', len(ref_odf))
print('Difference:', len(rgi7)-len(ref_odf))

#### Check for 'nominal glaciers' in the RGI6 original data and delete them from new RGI subset from GLIMS if they are in there 

In [None]:
# how many nominals in RGI06 (identifiable via 'Status' attribute in RGI 06)
nom = ref_odf.loc[ref_odf.Status == 2]
len(nom)

#### Total area

In [None]:
# add an area field to RGI_ss and reference data
ref_odf['area'] = ref_odf.to_crs({'proj':'cea'}).area

In [None]:
# print and compare area values
Area_Rep = rgi7['area'].sum()/1000000
print('Area Rep [km²]:', Area_Rep)
Area_RGI6 = ref_odf['area'].sum()/1000000
print('Area RGI6 [km²]:', Area_RGI6)
d = (Area_Rep - Area_RGI6)
d_perc = (d/Area_Rep*100)
print('Area difference [km²]:',d,'/','percentage:', d_perc)