# RGI11 (Central Europe)

F. Roura-Adseiras & Fabien Maussion

Goal:
- Alps: updates of the Paul 2003 dataset
- Pytrenees: new inventory by Izagirre

In [None]:
import pandas as pd
import geopandas as gpd
import subprocess
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
from utils import (mkdir, submission_summary, needs_size_filter, size_filter, plot_map, plot_date_hist, 
                   find_duplicates, open_zip_shapefile, correct_geoms, fix_overaps)
import os

## Files and storage paths

In [None]:
# Region of interest
reg = 11

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# Izaguirre file for GLIMS check 
ref_reg_file_p = os.path.join(data_dir, 'l0_support_data', 'pyrenees2000.zip') 

# Frank file for GLIMS check 
ref_reg_file_a = os.path.join(data_dir, 'l0_support_data', 'C3S_GI_RGI11_L5_2003.zip') 

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '11_rgi60_CentralEurope.zip')

In [None]:
# Support data
support_dir = os.path.join(data_dir, 'l0_support_data')

### Load the GLIMS input data

In [None]:
# Read L2 files
shp = gpd.read_file('tar://' + l2_dir + f'/RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp')

### List of submissions 

In [None]:
sdf, _ = submission_summary(shp)
sdf

In [None]:
# # Optional: write out selection in intermediate shape files for manual GIS review
# tmp_output_dir = mkdir(os.path.join(data_dir, 'l0_tmp_data', f'rgi{reg:02d}_inventories'))
# tmp_output_dir_tar = mkdir(os.path.join(data_dir, 'l0_tmp_data'))
# for subid in shp.subm_id.unique():
#     s_loc = shp.loc[shp.subm_id == subid]
#     s_loc.to_file(tmp_output_dir + f'/subm_{int(subid):03d}.shp')
# print('Taring...')
# print(subprocess.run(['tar', '-zcvf', f'{tmp_output_dir_tar}/rgi{reg:02d}_inventories.tar.gz', '-C', 
#                       os.path.join(data_dir, 'l0_tmp_data'), f'rgi{reg:02d}_inventories']))

## Outline selection 

In [None]:
# For Alps we use 'subm_id' = 731 as our main dataset
RGI_a = shp.loc[shp['subm_id'] == 731].copy()

# Sel by size
print(len(RGI_a))
RGI_a = size_filter(RGI_a)
len(RGI_a)

In [None]:
# For Pyrenees we use 'subm_id' = 715 as our main dataset
RGI_p = shp.loc[shp['subm_id'] == 715].copy()

# Sel by size
print(len(RGI_p))
RGI_p = size_filter(RGI_p)
len(RGI_p)

In [None]:
# combine the geodataframes
rgi7 = pd.concat([RGI_a, RGI_p])
rgi7['is_rgi6'] = False

### Some sanity checks 

In [None]:
rgi7 = correct_geoms(rgi7)

In [None]:
dupes = find_duplicates(rgi7)

In [None]:
rgi7 = fix_overaps(rgi7)

In [None]:
needs_size_filter(rgi7)

In [None]:
sdf, df_class = submission_summary(rgi7)
df_class

In [None]:
# Check the orphaned rock outcrops
orphan_f = os.path.join(data_dir, 'l1_orphan_interiors', f'RGI{reg:02d}', f'RGI{reg:02d}.shp')
if os.path.exists(orphan_f):
    orphan_f = gpd.read_file(orphan_f)
    check = np.isin(rgi7.subm_id.unique(), orphan_f.subm_id.unique())
    if np.any(check):
        print(f'Orphan rock outcrops detected in subm_id {rgi7.subm_id.unique()[check]}')
        orphan_f['area'] = orphan_f.to_crs({'proj':'cea'}).area

### Plots 

In [None]:
plot_map(rgi7, reg, loc='upper left', linewidth=2)

In [None]:
plot_map(rgi7, reg, loc='upper left', linewidth=2, is_rgi6=True)

In [None]:
plot_date_hist(rgi7, reg)

### Text for github

In [None]:
fgh = sdf.T
fgh

In [None]:
print(fgh.to_markdown(headers=np.append(['subm_id'], fgh.columns)))

## Write out and tar 

In [None]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi7.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

## New RGI-file created - Check result!

Load reference data (here RGI6 and the original contributions) to enable comparison

In [None]:
# load reference data
from utils import open_zip_shapefile
ref_rgi6 = open_zip_shapefile(rgi6_reg_file)

# Al
ref_a = open_zip_shapefile(os.path.join(support_dir, 'C3S_GI_RGI11_L5_2003.zip'))

# Py
ref_p = open_zip_shapefile(os.path.join(support_dir, 'pyrenees2000.zip'), exclude_pattern='__MACOSX', include_pattern='glaciers_')

### Compare new RGI7-file to RGI6

#### Number of elements (differences do not necessarily depict problems)

In [None]:
print('Number of glaciers in new RGI:', len(rgi7))
print('Number of glaciers in RGI6:', len(ref_rgi6))
print('Difference:', len(rgi7)-len(ref_rgi6))

### How many nominal glaciers were there in RGI06?

In [None]:
len(ref_rgi6.loc[ref_rgi6.Status == 2])

### Total area

In [None]:
# add an area field to RGI_ss and reference data
ref_rgi6['area'] = ref_rgi6.to_crs({'proj':'cea'}).area
ref_p['area'] = ref_p.to_crs({'proj':'cea'}).area

In [None]:
# print and compare area values
Area_RGI = rgi7['area'].sum() * 1e-6
print('Area RGI7 [km²]:', Area_RGI)
Area_ref = ref_rgi6['area'].sum() * 1e-6
print('Area RGI6 [km²]:', Area_ref)
d = (Area_RGI - Area_ref)
print('Area difference [km²]:', d)

## Comparison to reference products

### Pyrennees (no problem) 

In [None]:
# add an area field to RGI_ss and reference data
RGI_p['area'] = RGI_p.to_crs({'proj':'cea'}).area
ref_p['area'] = ref_p.to_crs({'proj':'cea'}).area

print(len(ref_p))
ref_p = ref_p.loc[np.round(ref_p['area'] * 1e-6, 3) >= 0.01].copy()
len(ref_p)

In [None]:
print('Number of glaciers in new RGI subset:', len(RGI_p))
print('Number of glaciers in reference data (izaguirre):', len(ref_p))
print('Difference:', len(RGI_p)-len(ref_p))

In [None]:
# print and compare area values
Area_rgi = RGI_p['area'].sum()/1000000
print('Area RGI [km²]:', Area_rgi)
Area_ref = ref_p['area'].sum()/1000000
print('Area ref:', Area_ref)
d = (Area_rgi - Area_ref)
d_perc = (d/Area_rgi*100)
print('Area difference [km²]:',d,'/','percentage:', d_perc)

### Alps (no problem)

In [None]:
# add an area field to RGI_ss and reference data
RGI_a['area'] = RGI_a.to_crs({'proj':'cea'}).area
ref_a['area'] = ref_a.to_crs({'proj':'cea'}).area

print(len(ref_a))
ref_a = ref_a.loc[np.round(ref_a['area'] * 1e-6, 3) >= 0.01].copy()
len(ref_a)

In [None]:
print('Number of glaciers in new RGI subset:', len(RGI_a))
print('Number of glaciers in reference data (Franck):', len(ref_a))
print('Difference:', len(RGI_a)-len(ref_a))

In [None]:
# print and compare area values
Area_rgi = RGI_a['area'].sum()/1000000
print('Area RGI [km²]:', Area_rgi)
Area_ref = ref_a['area'].sum()/1000000
print('Area ref:', Area_ref)
d = (Area_rgi - Area_ref)
d_perc = (d/Area_rgi*100)
print('Area difference [km²]:',d,'/','percentage:', d_perc)

For Alps, no substantial differences between the original Frank and glims inventories, except for a glacier, that we want to find now:

### Find the missing glacier 

In [None]:
df_ref = ref_a.copy()
rgi7 = RGI_a.copy()
df_ref = df_ref.to_crs(rgi7.crs)

In [None]:
import progressbar

In [None]:
def xy_coord(geom):
    """To compute CenLon CenLat ourselves"""
    x, y = geom.xy
    return x[0], y[0]

In [None]:
# compute CenLon CenLat ourselves
rp = df_ref.representative_point()

coordinates = np.array(list(rp.apply(xy_coord)))
df_ref['CenLon'] = coordinates[:, 0]
df_ref['CenLat'] = coordinates[:, 1]

In [None]:
df_ref_orig = df_ref.copy()

In [None]:
# Loop over all RGI7 glaciers and find their equivalent in ref
df_ref = df_ref_orig.copy()
not_found = {}
to_drop = []
for i, (ref_area, lon, lat) in progressbar.progressbar(enumerate(zip(rgi7['area'].values, rgi7.CenLon.values, rgi7.CenLat.values)), max_value=len(rgi7)):
#     dist = haversine(lon, lat, df_ref.CenLon.values, df_ref.CenLat.values)
    dist = (lon - df_ref.CenLon.values)**2 + (lat - df_ref.CenLat.values)**2 
    found = False
    for j in np.argsort(dist)[:10]:
        s6 = df_ref.iloc[j]
        if np.allclose(s6['area'], ref_area, rtol=0.01):
            found = True
            to_drop.append(s6.name)
            break
    if not found:
        not_found[i] = df_ref.iloc[np.argsort(dist)[:10]]
    if len(to_drop) > 1000:
        df_ref.drop(labels=to_drop, inplace=True)
        to_drop = []
df_ref.drop(labels=to_drop, inplace=True)

In [None]:
print(len(not_found), len(df_ref))

In [None]:
df_ref.plot(edgecolor='k', column='area');

In [None]:
pb_rgi7 = rgi7.iloc[list(not_found.keys())]
pb_rgi7.plot(edgecolor='k', column='area');

**Conclusion: there is no problem in GLIMS!!!**