# RGI-07: Region 10 (Asia North)

F. Maussion & S. Galos, June 2021

In [1]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
import os
from utils import mkdir

## Files and storage paths

In [2]:
# Region of interest
reg = 10

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '10_rgi60_NorthAsia.zip')

In [3]:
# Specific to this region: boxes where data has to be selected differently
support_dir = os.path.join(data_dir, 'l0_support_data')

# Option 1: selection by S. Galos (exchange allmost all glaciers in Kamtchatka with few exeptions (where RGI seems to be better)...
#... plus keep most of small RGI6 glaciers not covered by Barr)
#box_type = 'RGI07_R10_Barr_sel'

# Option 2: exchange all Kamtchatka glaciers by Barr data
box_type = 'RGI07_R10_Barr_all'

# OK path to file
box_file = os.path.join(support_dir, f'{box_type}.tar.gz')

### Load the input data

In [4]:
# Read L2 files
shp = gpd.read_file('tar://' + l2_dir + f'/RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp')

## Apply selection criteria to create the RGI7 data subset

### Step 1: extract RGI6 from GLIMS data and do a check

In [5]:
#...extract RGI06 from GLIMS based on 'geog_area'
RGI_ss = shp.loc[shp['geog_area']=='Randolph Glacier Inventory; Umbrella RC for merging the RGI into GLIMS']

#### load reference data (here RGI6) to enable comparison

In [6]:
# Just to know the name of the file to open from zip
import zipfile
with zipfile.ZipFile(rgi6_reg_file, "r") as z:
    for f in z.filelist:
        if '.shp' in f.filename:
            fname = f.filename

# load reference data
ref_odf = gpd.read_file('zip://' + rgi6_reg_file + '/' + fname)

#### Number of elements (differences do not necessarily depict major problems)

In [7]:
print('Number of glaciers in new RGI subset:', len(RGI_ss))
print('Number of glaciers in reference data:', len(ref_odf))
print('Difference:', len(RGI_ss)-len(ref_odf))

Number of glaciers in new RGI subset: 5142
Number of glaciers in reference data: 5151
Difference: -9


#### check for dublicate glacier IDs

In [8]:
print ('number of glaciers without unique id in RGI06:', len(ref_odf)-len(ref_odf['GLIMSId'].unique()))

number of glaciers without unique id in RGI06: 15


In [9]:
print ('number of glaciers without unique id in RGI06 from GLIMS data base:', len(RGI_ss)-len(RGI_ss['glac_id'].unique()))

number of glaciers without unique id in RGI06 from GLIMS data base: 56


#### Check for 'nominal glaciers' in the RGI6 original data and delete them from new RGI subset from GLIMS if they are in there 


See https://github.com/GLIMS-RGI/glims_issue_tracker/issues/6 for context.

In [10]:
# how many nominals in RGI06 (identifiable via 'Status' attribute in RGI 06)
nom = ref_odf.loc[ref_odf.Status == 2].copy()
len(nom)

116

In [11]:
# drop nominal glaciers from new RGI subset
RGI_ss = (RGI_ss.loc[~RGI_ss['glac_id'].isin(nom['GLIMSId'])]).copy()

#### Total area

In [12]:
# add an area field to RGI_ss and reference data
RGI_ss['area'] = RGI_ss.to_crs({'proj':'cea'}).area
ref_odf['area'] = ref_odf.to_crs({'proj':'cea'}).area
nom['area'] = nom.to_crs({'proj':'cea'}).area

In [13]:
# print and compare area values
Area_RGI = RGI_ss['area'].sum() * 1e-6
print('Area RGI [km²]:', Area_RGI)
Area_ref = ref_odf['area'].sum() * 1e-6
print('Area Ref [km²]:', Area_ref)
Area_nom = nom['area'].sum() * 1e-6
print('Area Nom [km²]:', Area_nom)
d = (Area_RGI + Area_nom - Area_ref) * 1e-6
print('Area difference [km²]:',d)

Area RGI [km²]: 2325.531906416692
Area Ref [km²]: 2410.021788898029
Area Nom [km²]: 84.48835584119824
Area difference [km²]: -1.5266401387634686e-09


### result of check (RGI from Glims global data base vs. RGI06 original):

The number of individual gaciers differs by 9 but areas of both files differ by only 1527 m² for whole Region! The difference in number of individuals results cannot clearly be explained but the fact that total areas are "equal" and an overlay test shows no cases of lost glaciers leads to the assumption that the difference in individual glacier numbers is a merging issue and hence of minor relevance as 'all' glacierized areas are covered. See https://github.com/GLIMS-RGI/glims_issue_tracker/issues/5 for context

TODO: find these glaciers as has been done for RGI01 and 13, 14, 15

# If RGI07 should be equal to RGI06 stop here, else...

## ...start refinement and introduce Barr data for Kamtchatka

In [14]:
# extract data by Barr from GLIMS data which is subm_id 716
Barr = shp.loc[shp['subm_id']== 716]

In [15]:
## load a shapefile containing polygons which define areas uncovered by newly created RGI07 file
# OPTION A
# load a shapefile indicating the areas where glacier outlines of RGI06 shall be replaced by data by Barr (decided by steph)
# OPTION B
# load a shapefile indicating the areas where glacier outlines of RGI06 shall be replaced by data by Barr (replace whole region)
# See above to select the one you want
RA = gpd.read_file('tar://' + box_file + f'/{box_type}/{box_type}.shp')

In [16]:
# do an overlay of Barr data (subm_id 716) and the shapefile above to drop all glaciers outside 
Barr_ov = gpd.overlay(Barr, RA , how='intersection')

In [17]:
# do an overlay of RGI06 (subm_id 636) and the shapefile above to drop all glaciers inside
RGI_ss_ov = gpd.overlay(RGI_ss, RA , how='difference')

In [18]:
# combine the two selections and thereby create RGI07-reg10 
RGI07_reg10 = RGI_ss_ov.append(Barr_ov, sort=True)

In [19]:
# add a column with the geometry area to enable comparison with RGI6
RGI07_reg10['area'] = RGI07_reg10.to_crs({'proj':'cea'}).area

In [20]:
# print and compare area values
Area_RGI07_reg10 = RGI07_reg10['area'].sum() * 1e-6
print('Area RGI07 [km²]:', Area_RGI07_reg10)
Area_ref = ref_odf['area'].sum() * 1e-6
print('Area RGI06 [km²]:', Area_ref)
d = (Area_RGI07_reg10 - Area_ref)
print('Area difference [km²]:',d)
dn = d + Area_nom
print('Area difference considering dropped nominals [km²]:',dn)

Area RGI07 [km²]: 2321.2445544769434
Area RGI06 [km²]: 2410.021788898029
Area difference [km²]: -88.77723442108572
Area difference considering dropped nominals [km²]: -4.288878579887481


## Write out and tar 

In [21]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
RGI07_reg10.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

Writing...
Taring...
CompletedProcess(args=['tar', '-zcvf', '../../rgi7_data/l3_rgi7a_tar/RGI10.tar.gz', '-C', '../../rgi7_data/l3_rgi7a', 'RGI10'], returncode=0)
