# RGI-07: Region 12 (Caucasus and Middle East)

F. Maussion & S. Galos, June 2021

In [13]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
import os
from utils import mkdir

## Files and storage paths

In [14]:
# Region of interest
reg = 12

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '12_rgi60_CaucasusMiddleEast.zip')

In [15]:
# Specific to this region: boxes where data has to be selected differently
support_dir = os.path.join(data_dir, 'l0_support_data')
box_file = os.path.join(support_dir, 'RGI07_R12_UCA.tar.gz')

### Load the input data

In [16]:
# Read L2 files
shp = gpd.read_file('tar://' + l2_dir + f'/RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp')

### List of submissions 

In [35]:
toprint = []
for subid in shp.subm_id.unique():
    s_loc = shp.loc[shp.subm_id == subid]
    s = ''
    for c in ['subm_id', 'analysts', 'src_date']:
        toprint = s_loc[c].unique()
        if c != 'src_date':
            s += ' ' + (str(toprint[0]))
        else:
            for d in toprint:
                s += ' ' + d[:4]
    print(s)

 76.0 Nosenko, Gennadi (Gennady) 2004
 79.0 Nosenko, Gennadi (Gennady) 2004
 81.0 Nosenko, Gennadi (Gennady) 2004
 83.0 Nosenko, Gennadi (Gennady) 2004
 87.0 Nosenko, Gennadi (Gennady) 2004
 327.0 Khromova, Tatyana 2001
 341.0 Seliverstov, Yriy 2000
 423.0 Nosenko, Gennadi (Gennady) 2004
 424.0 Nosenko, Gennadi (Gennady) 1999
 425.0 Khromova, Tatyana 1999
 426.0 Nosenko, Gennadi (Gennady) 1999
 427.0 Nosenko, Gennadi (Gennady) 2001
 428.0 Nosenko, Gennadi (Gennady) 2004
 429.0 Nosenko, Gennadi (Gennady) 2004
 588.0 Ashkan, Farokhnia; Karimi, Neamat; Sara, Sheshangosht 2011
 642.0 Tielidze, Levan 2014 2013 2014 2014 2011 2016
 643.0 Tielidze, Levan 1987 1985 1986 1986 1986 1987
 644.0 Tielidze, Levan 1960
 662.0 Sarikaya, Mehmet Akif 2008 2006 2006 2004 2008 2009 2011


### Apply selection criteria to create the RGI7 data subset

In [5]:
# try to get the data relevant for RGI07 and select by attributes - 
# for Caucasus we use 'subm_id' 642 by analyst 'Tielidze, Levan' which corresponds to the most recent (ca. 2013)
# of three available data sets for Caucasus by this anayst (others are from around 1960 and appr. 1985 respectively)
RGI_ss_caucasus = shp.loc[shp['subm_id'] == 642]
len(RGI_ss_caucasus)

2021

In [6]:
# try to get the data relevant for RGI07 and select by attributes - for Middle East: 'subm_id' 588 ()
RGI_ss_middle_east = shp.loc[shp['subm_id'] == 588]
len(RGI_ss_middle_east)

484

In [7]:
RGI_ss_middle_east.iloc[0]

line_type                                            glac_bound
anlys_id                                               297051.0
glac_id                                          G049337E33320N
anlys_time                                  2014-07-05T00:00:00
area                                                4651.577985
db_area                                                0.004652
width                                                       0.0
length                                                      0.0
primeclass                                                  0.0
min_elev                                                    0.0
mean_elev                                                   0.0
max_elev                                                    0.0
src_date                                    2011-09-01T00:00:00
rec_status                                                 okay
glac_name                                                  None
wgms_id                                 

In [8]:
# combine the two geodataframes
RGI_ss = RGI_ss_caucasus.append(RGI_ss_middle_east)

### Should we add some 1985 glaciers for completeness?

Hugonnet et al made a subjective choice to add some 1985 glaciers... should we as well?

In [9]:
# We decided not to for now
# EG:
# RGI_ss_caucasus_1985 = shp.loc[shp['subm_id'] == 644]
# len(RGI_ss_caucasus_1985)

### Should we add missing glaciers (only a few in peripheral areas) based on RGI06?

This is done based on a manually created shapefile mask containing the few glacierized areas not considered in the above data. Note that many of the glaciers added by this procedure are not glaciers by common definition but rock glaciers or even areas without ice in reality. Apart from that, outlines partly suffer from incorrect spatial positioning etc...  

In [10]:
# load a shapefile containing polygons which define areas uncovered by newly created RGI07 file
UCA = gpd.read_file('tar://' + box_file + '/RGI07_R12_UCA/RGI07_R12_UCA.shp')

In [11]:
# extract glacier outlines for regions mentioned above from glims data
df_UCA = gpd.overlay(shp, UCA, how='intersection')

In [12]:
# select submission ID of interesst (for RGI07 only subm_ID 662 available)
df_UCA = df_UCA.loc[df_UCA['subm_id']== 662]

In [13]:
# append df_UCA to RGI_ss
RGI_ss = RGI_ss.append(df_UCA, sort = False);

## Write out and tar 

In [14]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
RGI_ss.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

Writing...
Taring...
CompletedProcess(args=['tar', '-zcvf', '../../rgi7_data/l3_rgi7a_tar/RGI12.tar.gz', '-C', '../../rgi7_data/l3_rgi7a', 'RGI12'], returncode=0)


## New RGI-file created - Check result!

### load reference data (here RGI6) to enable comparison

In [15]:
# Just to know the name of the file to open from zip
import zipfile
with zipfile.ZipFile(rgi6_reg_file, "r") as z:
    for f in z.filelist:
        if '.shp' in f.filename:
            fname = f.filename

# load reference data
ref_odf = gpd.read_file('zip://' + rgi6_reg_file + '/' + fname)

## Compare new RGI-file and reference data set 

### Number of elements (differences do not necessarily depict major problems)

In [16]:
print('Number of glaciers in new RGI subset:', len(RGI_ss))
print('Number of glaciers in reference data:', len(ref_odf))
print('Difference:', len(RGI_ss)-len(ref_odf))

Number of glaciers in new RGI subset: 2556
Number of glaciers in reference data: 1888
Difference: 668


### How many nominal glaciers were there in RGI06-R12?

In [17]:
len(ref_odf.loc[ref_odf.Status == 2])

339

### Total area

In [18]:
# add an area field to RGI_ss and reference data
RGI_ss['area'] = RGI_ss.to_crs({'proj':'cea'}).area
ref_odf['area'] = ref_odf.to_crs({'proj':'cea'}).area

In [19]:
# print and compare area values
Area_RGI = RGI_ss['area'].sum() * 1e-6
print('Area RGI [km²]:', Area_RGI)
Area_ref = ref_odf['area'].sum() * 1e-6
print('Area Ref [km²]:', Area_ref)
d = (Area_RGI - Area_ref)
print('Area difference [km²]:', d)

Area RGI [km²]: 1246.2573226628851
Area Ref [km²]: 1306.999710204219
Area difference [km²]: -60.742387541333756


# End of revised noteboook