## Check that the RGI entitites can be attributed to the correct region and sub-region  

This is the third of a series of 3 similar notebooks. This one checks that both problems have been resolved.

In [1]:
in_dir = 'RGI62'

In [2]:
%matplotlib inline

In [3]:
import pandas as pd
import geopandas as gpd
import shapely.geometry as shpg
import progressbar
import os
import glob
import numpy as np
from oggm import utils, cfg
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
in_dir = os.path.abspath(in_dir)

In [5]:
# The RGI megafile
out = []
for reg in range(1, 20):
    f = list(glob.glob(in_dir + "/*/{:02d}_*.shp".format(reg)))
    assert len(f) == 1
    sh = gpd.read_file(f[0]).set_index('RGIId')
    out.append(sh)
mdf =  pd.concat(out)

In [6]:
# Add Point geometries for faster checks
mdf['points'] = [shpg.Point(lon, lat) for (lon, lat) in zip(mdf.CenLon, mdf.CenLat)]

In [8]:
rgi_reg = gpd.read_file(os.path.join(in_dir, '00_rgi62_regions', '00_rgi62_O1Regions.shp'))
rgi_sreg = gpd.read_file(os.path.join(in_dir, '00_rgi62_regions', '00_rgi62_O2Regions.shp'))

In [9]:
mdf['RGI_CODE'] = ['{:02d}-{:02d}'.format(int(d1), int(d2)) for (d1, d2) in zip(mdf.O1Region, mdf.O2Region)]

In [10]:
np.testing.assert_equal(mdf['O1Region'].unique(), rgi_reg['RGI_CODE'].unique())

In [11]:
mdf['NOT_IN_REG'] = False
mdf['NEW_REG'] = ''
for reg in mdf['O1Region'].unique():
    sel = rgi_reg[rgi_reg.RGI_CODE == reg]
    mdf_sel = mdf.loc[mdf.O1Region == reg]
    for rid, p, g in progressbar.progressbar(zip(mdf_sel.index, mdf_sel.points, mdf_sel.geometry), 
                                             max_value=len(mdf_sel), suffix= ' RGI Reg: ' + reg):
        if not np.sum(sel.contains(p)) > 0:
            if not np.sum(sel.intersects(g)) > 0:
                mdf.loc[rid, 'NOT_IN_REG'] = True
                
                cc = rgi_reg.loc[rgi_reg.contains(p)]
                if len(cc) == 0:
                    pass
                elif len(cc) == 1:
                    mdf.loc[rid, 'NEW_REG'] = cc.iloc[0].RGI_CODE



100% (27108 of 27108) |#######| Elapsed Time: 0:00:34 Time:  0:00:34 RGI Reg: 1
100% (18855 of 18855) |#######| Elapsed Time: 0:00:14 Time:  0:00:14 RGI Reg: 2
100% (4556 of 4556) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 3
100% (7415 of 7415) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 4
100% (20261 of 20261) |#######| Elapsed Time: 0:00:06 Time:  0:00:06 RGI Reg: 5
100% (568 of 568) |###########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 6
100% (1615 of 1615) |#########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 7
100% (3417 of 3417) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 8
100% (1069 of 1069) |#########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 9
100% (5151 of 5151) |########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 10
100% (3927 of 3927) |########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 11
100% (1888 of 1888) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 12
100% (54429 of 54429) |######| Elapsed T

In [12]:
misplaced = mdf.loc[mdf.NOT_IN_REG & mdf.NEW_REG]
assert len(misplaced) == 0
not_ok = mdf.loc[mdf.NOT_IN_REG & (mdf.NEW_REG == '')]
assert len(not_ok) == 0

In [13]:
mdf['NOT_IN_SREG'] = False
mdf['NEW_SREG'] = ''
for sreg in mdf['RGI_CODE'].unique():
    sel = rgi_sreg[rgi_sreg.RGI_CODE == sreg]
    mdf_sel = mdf.loc[mdf.RGI_CODE == sreg]
    for rid, p, g in progressbar.progressbar(zip(mdf_sel.index, mdf_sel.points, mdf_sel.geometry), 
                                             max_value=len(mdf_sel), suffix= ' RGI SREG: ' + sreg):
        if not np.sum(sel.contains(p)) > 0:
            if not np.sum(sel.intersects(g)) > 0:
                mdf.loc[rid, 'NOT_IN_SREG'] = True
                
                cc = rgi_sreg.loc[rgi_sreg.contains(p)]
                if len(cc) == 0:
                    pass
                elif len(cc) == 1:
                    mdf.loc[rid, 'NEW_SREG'] = cc.iloc[0].RGI_CODE

100% (5773 of 5773) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-02
100% (10552 of 10552) |##| Elapsed Time: 0:00:09 Time:  0:00:09 RGI SREG: 01-06
100% (4258 of 4258) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-04
100% (616 of 616) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 01-01
100% (872 of 872) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 01-03
100% (5037 of 5037) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-05
100% (3202 of 3202) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-04
100% (7389 of 7389) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 02-02
100% (5067 of 5067) |####| Elapsed Time: 0:00:03 Time:  0:00:03 RGI SREG: 02-03
100% (1235 of 1235) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-01
100% (1962 of 1962) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-05
100% (880 of 880) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-03
100% (627 of 627) |######| Elapsed Time:

In [14]:
misplaced = mdf.loc[mdf.NOT_IN_SREG & mdf.NEW_SREG]
assert len(misplaced) == 0
not_ok = mdf.loc[mdf.NOT_IN_SREG & (mdf.NEW_SREG == '')]
assert len(not_ok) == 0