# RGI01 outlines selection

In [1]:
import geopandas as gpd
from oggm import utils
import subprocess

## Read the file 

In [2]:
idir = '../l2_sel_reg_tars/'

In [3]:
# Read L2 files
shp = gpd.read_file('tar://' + idir + 'RGI01.tar.gz/RGI01/RGI01.shp')

## Outline selection 

In [4]:
# List analysts
shp.analysts.unique()

array(['Beedle, Matthew', 'Manley, William', 'Herreid, Samuel',
       'Frey, Holger; Le Bris, Raymond; Paul, Frank', 'Rich, Justin',
       'Bolch, Tobias',
       'Beedle, Matthew; Berthier, Etienne; Bolch, Tobias; Burgess, Evan; Cogley, Graham; Forster, Richard; Giffen, Bruce A.; Hall, Dorothy K.; Kienholz, Christian; LeBris, Raymond; Manley, William',
       'Maraldo, Dean'], dtype=object)

For this region selecting glaciers can be done either by analyst or by "proc_desc":

In [5]:
sel_analysts = ('Beedle, Matthew; Berthier, Etienne; Bolch, Tobias; Burgess, Evan; Cogley, Graham; '
                'Forster, Richard; Giffen, Bruce A.; Hall, Dorothy K.; Kienholz, Christian; LeBris, Raymond; Manley, William')
len(shp.loc[shp.analysts == sel_analysts])

27106

In [6]:
sel_proc_desc = ('Semi-automated glacier classification.; From Randolph Glacier Inventory Version 5.0. Source file: 01_rgi50_Alaska.zip.  '
                 'See http://www.glims.org/RGI/.  GLIMS IDs were adjusted and metadata was added at NSIDC.')
len(shp.loc[shp.proc_desc == sel_proc_desc])

27106

In [7]:
rgi7 = shp.loc[shp.analysts == sel_analysts]

## Consistency check with RGI6 

In [8]:
# Just to know the name of the file to open from zip
import zipfile
with zipfile.ZipFile('../l0_RGIv6/01_rgi60_Alaska.zip', "r") as z:
    for f in z.filelist:
        print(f.filename)

01_rgi60_Alaska.dbf
01_rgi60_Alaska.prj
01_rgi60_Alaska.shp
01_rgi60_Alaska.shx
01_rgi60_Alaska_hypso.csv


In [9]:
rgi6 = gpd.read_file('zip://../l0_RGIv6/01_rgi60_Alaska.zip/01_rgi60_Alaska.shp')

In [10]:
len(rgi7), len(rgi6)

(27106, 27108)

There are two missing glaciers! One has been wrongly attributed to Region 1 in the RGI6 (`RGI60-01.05285`), another we don't know yet.

In [11]:
rgi6_cor = rgi6.loc[rgi6.RGIId != 'RGI60-01.05285']

Test the areas:

In [12]:
rgi6['Area_us'] = rgi6.to_crs({'proj':'cea'}).area * 1e-6

In [15]:
print('Area RGI7a', rgi7['area'].sum() * 1e-6)
print('Area RGI6', rgi6['Area'].sum())
print('Area RGI6 computed by us', rgi6['Area_us'].sum())
print('diff areas RGI6 - RGI7 computed by us', rgi6['Area_us'].sum() - rgi7['area'].sum() * 1e-6)

Area RGI7a 86724.5510826829
Area RGI6 86725.053
Area RGI6 computed by us 86724.97380315595
diff areas RGI6 - RGI7 computed by us 0.4227204730559606


So, there are practically no difference in terms of area. The remain difference in term of number of glaciers must be a matter of outline definition...

## Write out and tar 

In [66]:
reg = 1
odir = utils.mkdir('../l3_rgi7b')
odir_tar = utils.mkdir('../l3_rgi7b_tar')

In [69]:
dd = utils.mkdir(f'{odir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi7.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{odir_tar}/RGI{reg:02d}.tar.gz', '-C', odir, f'RGI{reg:02d}']))

Writing...
Taring...
CompletedProcess(args=['tar', '-zcvf', '../l3_rgi7b_tar/RGI01.tar.gz', '-C', '../l3_rgi7b', 'RGI01'], returncode=0)
