# RGI01 (Alaska)

F. Maussion & S. Galos

Goal: strictly equivalent to RGI6

In [None]:
import pandas as pd
import geopandas as gpd
import subprocess
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
from utils import mkdir, submission_summary, needs_size_filter, size_filter, plot_map, plot_date_hist, find_duplicates
import os

In [None]:
# Region of interest
reg = 1

# go down from rgi7_scripts/workflow
data_dir = '../../rgi7_data/'

# Level 2 GLIMS files
l2_dir = os.path.join(data_dir, 'l2_sel_reg_tars')

# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_rgi7a'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_rgi7a_tar'))

# RGI v6 file for comparison later 
rgi6_reg_file = os.path.join(data_dir, 'l0_RGIv6', '01_rgi60_Alaska.zip')

In [None]:
# Read L2 files
shp = gpd.read_file('tar://' + os.path.join(l2_dir, f'RGI{reg:02d}.tar.gz/RGI{reg:02d}/RGI{reg:02d}.shp'))

### List of submissions 

In [None]:
sdf, df_class = submission_summary(shp)
# There are loads of small submissions in this region
sdf.loc[sdf.N > 1]

In [None]:
# # Optional: write out selection in intermediate shape files for manual GIS review
# tmp_output_dir = mkdir(os.path.join(data_dir, 'l0_tmp_data', f'rgi{reg:02d}_inventories'))
# tmp_output_dir_tar = mkdir(os.path.join(data_dir, 'l0_tmp_data'))
# for subid in shp.subm_id.unique():
#     s_loc = shp.loc[shp.subm_id == subid]
#     s_loc.to_file(tmp_output_dir + f'/subm_{int(subid):03d}.shp')
# print('Taring...')
# print(subprocess.run(['tar', '-zcvf', f'{tmp_output_dir_tar}/rgi{reg:02d}_inventories.tar.gz', '-C', 
#                       os.path.join(data_dir, 'l0_tmp_data'), f'rgi{reg:02d}_inventories']))

## Outline selection 

In [None]:
# This is a small group of glaciers (McNabb)
mcnabb = shp.loc[shp.subm_id.isin([766])].copy()
mcnabb['is_rgi6'] = False
mcnabb.plot(edgecolor='k');

In [None]:
# This is the corrected outline in GLIMS
shp.loc[shp.subm_id.isin([756])].plot(edgecolor='k');

# This was the outline Bruce thought was ok - but it was not OK https://trello.com/c/9yUMSONP/186-new-missing-geometry-in-alaska
# shp.loc[shp.anlys_id.isin([373863])].plot(edgecolor='k');

In [None]:
# 756 is for the corrected outline
rgi7 = shp.loc[shp.subm_id.isin([624, 756])].copy()

# Nothing should change here
rgi7['is_rgi6'] = True

rgi7 = pd.concat([rgi7, mcnabb])

In [None]:
# Size filter?
needs_size_filter(rgi7)

### Some sanity checks 

In [None]:
sdf, df_class = submission_summary(rgi7)
df_class

In [None]:
# Check the orphaned rock outcrops
orphan_f = os.path.join(data_dir, 'l1_orphan_interiors', f'RGI{reg:02d}', f'RGI{reg:02d}.shp')
if os.path.exists(orphan_f):
    orphan_f = gpd.read_file(orphan_f)
    if np.any(np.isin(rgi7.subm_id.unique(), orphan_f.subm_id.unique())):
        print('Orphan rock outcrops detected.')

In [None]:
find_duplicates(rgi7);

### Plots 

In [None]:
plot_map(rgi7, reg)

In [None]:
plot_map(rgi7, reg, is_rgi6=True)

In [None]:
plot_date_hist(rgi7, reg)

### Text for github

In [None]:
fgh = sdf.T
fgh

In [None]:
print(fgh.to_markdown(headers=np.append(['subm_id'], fgh.columns)))

## Write out and tar 

In [None]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi7.to_file(dd + f'RGI{reg:02d}.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))

## Consistency check with RGI6 

In [None]:
# load reference data
from utils import open_zip_shapefile
rgi6 = open_zip_shapefile(rgi6_reg_file)

In [None]:
rgi7_ = rgi7.loc[rgi7['is_rgi6']]

In [None]:
len(rgi7_), len(rgi6)

One has been wrongly attributed to Region 1 in RGI6 (`RGI60-01.05285`)

In [None]:
# Remove wrongly attributed glacier from RGI6
rgi6 = rgi6.loc[rgi6.RGIId != 'RGI60-01.05285'].copy()

Test the areas:

In [None]:
rgi6['Area_us'] = rgi6.to_crs({'proj':'cea'}).area 
rgi7_['area'] = rgi7_.to_crs({'proj':'cea'}).area 

In [None]:
print('Area RGI7a (km2)', rgi7_['area'].sum() * 1e-6)
print('Area RGI6 (km2)', rgi6['Area'].sum())
print('Area RGI6 computed by us (km2)', rgi6['Area_us'].sum() * 1e-6)
print('diff areas RGI6 - RGI7 computed by us (km2)', (rgi6['Area_us'].sum() - rgi7_['area'].sum()) * 1e-6)

No issue anymore!

### The code below is now obsolete! GLIMS issue fixed 

### Find the missing glacier 

In [None]:
df_ref = rgi6.copy()
df_ref['area'] = rgi6['Area_us'] 

In [None]:
import progressbar

In [None]:
def xy_coord(geom):
    """To compute CenLon CenLat ourselves"""
    x, y = geom.xy
    return x[0], y[0]

In [None]:
# compute CenLon CenLat ourselves
rp = df_ref.representative_point()

coordinates = np.array(list(rp.apply(xy_coord)))
df_ref['CenLon'] = coordinates[:, 0]
df_ref['CenLat'] = coordinates[:, 1]

In [None]:
df_ref_orig = df_ref.copy()

In [None]:
# Loop over all RGI7 glaciers and find their equivalent in ref
df_ref = df_ref_orig.copy()
not_found = {}
to_drop = []
for i, (ref_area, lon, lat) in progressbar.progressbar(enumerate(zip(rgi7['area'].values, rgi7.CenLon.values, rgi7.CenLat.values)), max_value=len(rgi7)):
#     dist = haversine(lon, lat, df_ref.CenLon.values, df_ref.CenLat.values)
    dist = (lon - df_ref.CenLon.values)**2 + (lat - df_ref.CenLat.values)**2 
    found = False
    for j in np.argsort(dist)[:10]:
        s6 = df_ref.iloc[j]
        if np.allclose(s6['area'], ref_area, rtol=0.001):
            found = True
            to_drop.append(s6.name)
            break
    if not found:
        not_found[i] = df_ref.iloc[np.argsort(dist)[:10]]
    if len(to_drop) > 1000:
        df_ref.drop(labels=to_drop, inplace=True)
        to_drop = []
df_ref.drop(labels=to_drop, inplace=True)

In [None]:
print(len(not_found), len(df_ref))

In [None]:
df_ref

In [None]:
rgi6.loc[rgi6.RGIId.isin(['RGI60-01.20630', 'RGI60-01.25912', 'RGI60-01.25913'])].plot(edgecolor='k');

In [None]:
pb_rgi7 = rgi7.iloc[list(not_found.keys())]

In [None]:
pb_rgi7

In [None]:
pb_rgi7.plot(edgecolor='k');

In [None]:
df_ref

In [None]:
df_ref.plot(edgecolor='k');

We have found the problem! Reported here: https://github.com/GLIMS-RGI/glims_issue_tracker/issues/5

In [None]:
pb_rgi7.buffer(0).plot(edgecolor='k');

In [None]:
# Output directories
output_dir = mkdir(os.path.join(data_dir, 'l3_problem_glaciers'))
output_dir_tar = mkdir(os.path.join(data_dir, 'l3_problem_glaciers_tar'))

In [None]:
dd = mkdir(f'{output_dir}/RGI{reg:02d}/', reset=True)

print('Writing...')
rgi6.loc[rgi6.RGIId.isin(['RGI60-01.20630', 'RGI60-01.25912', 'RGI60-01.25913'])].to_file(dd + f'RGI{reg:02d}_ref.shp')

print('Taring...')
print(subprocess.run(['tar', '-zcvf', f'{output_dir_tar}/RGI{reg:02d}.tar.gz', '-C', output_dir, f'RGI{reg:02d}']))