# Modify the RGI6 regions files for RGI7 

**List of changes from RGI6 to RGI7**: 
- The southern boundary of region 12 (Caucasus and Middle East) has been shifted south by 2° (from 32°N to 30°N) to encompass a cluster of glaciers which were previously unassigned.
- The region boxes for region 01 (Alaska) used to encompass some islands in the Bering Sea East of Kamtchatka. This subregion contains no glaciers (and probably hasn't for a long time) and is now removed. 
- The data type of the `rgi_code` attribute in the first-order region file is now `str` (was `int`). The `rgi_code` now has a leading zero, for example `02` instead of `2`. 
- Region 19 has been renamed to "*Subantarctic and Antarctic Islands*" (was: "*Antarctic and Subantarctic*")
- The polygon of RGI region 19 has been updated to remove antarctic mainland. Potentially, a new region (`20`, "*Antarctic Mainland*") might be used in the future if glaciers are inventorized there.
- All abbreviations in the second-order regions file have been replaced by their full name (e.g. "East Central" instead of "EC")
- The first-order and second-order region files now have a field called `long_code` which contains a string representing the full region name, using the lowercase with underscores format (e.g. `02_western_canada_usa`). This field is used to name the corresponding RGI shapefiles.
- The subregion `05-11` (Greenland Ice Sheet) has been removed.
- The `WGMS_CODE` column has been deleted from all files.
- The `RGI_CODE` column is now called `o1region` (first order files) and `o2region` (second-order files)

In [None]:
# go down from rgi7_scripts/workflow/preprocessing
data_dir = '../../../rgi7_data/'

In [None]:
import os
import numpy as np
import pandas as pd
import shapely.geometry as shpg
import geopandas as gpd
from utils import mkdir
import shutil
from shapely.validation import make_valid

## Regions 

In [None]:
out_dir = os.path.abspath(os.path.join(data_dir, '00_rgi70_regions'))
mkdir(out_dir)

In [None]:
# Read the new names
df_ref = pd.read_csv(os.path.join(out_dir, '00_rgi70_rgi_region_names.csv'), dtype=str)
df_ref

In [None]:
# Read the RGI region files
rgi_dir = os.path.join(data_dir, 'l0_RGIv6')
rgi_reg = gpd.read_file('zip://' + os.path.join(data_dir, 'l0_RGIv6', '00_rgi60_regions.zip', '00_rgi60_O1Regions.shp'))

In [None]:
# Create a new region 20 based on the subregion outline
rgi_subreg = gpd.read_file('zip://' + os.path.join(data_dir, 'l0_RGIv6', '00_rgi60_regions.zip', '00_rgi60_O2Regions.shp'))

In [None]:
rgi_subreg.loc[rgi_subreg['RGI_CODE'] == '19-31'].plot();

In [None]:
aa_geom = rgi_subreg.loc[rgi_subreg['RGI_CODE'] == '19-31'].iloc[0].geometry

In [None]:
suba_geom = rgi_reg.iloc[-1].geometry.difference(aa_geom)

In [None]:
len(suba_geom.geoms)

In [None]:
suba_geom.geoms[1]

In [None]:
suba_geom = suba_geom.geoms[0]
suba_geom.is_valid

In [None]:
suba_geom

In [None]:
rgi_reg.loc[20, 'geometry'] = suba_geom

In [None]:
aa_geom

In [None]:
rgi_reg = pd.concat([rgi_reg, gpd.GeoDataFrame([aa_geom], columns=['geometry'], crs=rgi_reg.crs)], ignore_index=True)

In [None]:
rgi_reg

In [None]:
(~ rgi_reg.is_valid).sum()

In [None]:
rgi_reg.loc[~ rgi_reg.is_valid]

In [None]:
for i, s in rgi_reg.loc[~ rgi_reg.is_valid].iterrows():
    corr = make_valid(s.geometry)
    if type(corr) == shpg.Polygon:
        print(f'Success : {s.FULL_NAME}')
        rgi_reg.loc[i, 'geometry'] = corr
    else:
        for g in corr.geoms:
            print(type(g))

In [None]:
rgi_reg.loc[~ rgi_reg.is_valid]

In [None]:
corr = make_valid(rgi_reg.loc[~ rgi_reg.is_valid].iloc[0].geometry)

In [None]:
corr.geoms[0]

In [None]:
corr.geoms[1]

In [None]:
rgi_reg.loc[21, 'geometry'] = corr.geoms[0]
rgi_reg.loc[21, 'RGI_CODE'] = 20
rgi_reg['RGI_CODE'] = rgi_reg['RGI_CODE'].astype(int)
rgi_reg.loc[21, 'FULL_NAME'] = 'Antarctic Mainland'
rgi_reg.loc[20, 'FULL_NAME'] = 'Subantarctic and Antarctic Islands'

In [None]:
# Remove the useless Alaska box in the Eastern hemisphere
rgi_reg = rgi_reg.drop(1).reset_index()
del rgi_reg['index']

In [None]:
rgi_reg

In [None]:
# Rename things that need renaming
rgi_reg['RGI_CODE'] = [f'{int(i):02d}' for i in rgi_reg['RGI_CODE']]

In [None]:
rgi_reg = rgi_reg.rename({'WGMS_CODE':'LONG_CODE'}, axis=1)
rgi_reg['FULL_NAME'] = [df_ref.loc[df_ref.RGI_CODE == i, 'FULL_NAME'].values[0] for i in rgi_reg['RGI_CODE']]
rgi_reg['LONG_CODE'] = [df_ref.loc[df_ref.RGI_CODE == i, 'LONG_CODE'].values[0] for i in rgi_reg['RGI_CODE']]

In [None]:
# Select the RGI 12 polygon
poly = rgi_reg.loc[rgi_reg.RGI_CODE == '12'].iloc[0].geometry
poly.bounds

Let's go down to 30° South instead:

In [None]:
x, y = poly.exterior.xy
ny = np.where(np.isclose(y, 31), 30, y)
new_poly = shpg.Polygon(np.array((x, ny)).T)
rgi_reg.loc[rgi_reg.RGI_CODE == '12', 'geometry'] = new_poly

In [None]:
rgi_reg

In [None]:
(~ rgi_reg.is_valid).sum()

In [None]:
# Remove RGI 20
# rgi_reg = rgi_reg.iloc[:-1]

In [None]:
# Remame columns
names = [s.lower() for s in rgi_reg.columns]
names[1] = 'o1region'
rgi_reg.columns = names
rgi_reg = rgi_reg[['o1region', 'full_name', 'long_code', 'geometry']]
rgi_reg

In [None]:
print(rgi_reg.set_index('o1region')[['full_name', 'long_code']].to_markdown())

In [None]:
rgi_reg.crs = 'EPSG:4326'

In [None]:
out_file_dir = os.path.join(out_dir, '00_rgi70_O1Regions')
rgi_reg.to_file(out_file_dir)

In [None]:
shutil.make_archive(f'{out_dir}/00_rgi70_O1Regions', 'zip', out_file_dir)

In [None]:
# Check
rgi_reg = gpd.read_file(out_file_dir)
assert rgi_reg.o1region.dtype == 'O'

In [None]:
gpd.io.file.infer_schema(rgi_reg)

## Subregions 

In [None]:
rgi_reg = gpd.read_file('zip://' + os.path.join(data_dir, 'l0_RGIv6', '00_rgi60_regions.zip', '00_rgi60_O2Regions.shp'))

In [None]:
# Read the new names
df_ref = pd.read_csv(os.path.join(out_dir, '00_rgi70_rgi_subregion_names.csv'), dtype=str)
df_ref

In [None]:
len(rgi_reg.RGI_CODE.unique())

In [None]:
# Create a long code
long = []
for s in df_ref['FULL_NAME']:
    s = s.replace('(', '').replace(')', '').replace('/', '_').replace('-', '_')
    s = '_'.join([so.lower() for so in s.split(' ')])
    long.append(s.replace('__', '_').replace('__', '_'))
df_ref['LONG_CODE'] = long

In [None]:
# Move RGI12 a bit south
poly = rgi_reg.loc[rgi_reg.RGI_CODE == '12-02'].iloc[0].geometry
poly.bounds

In [None]:
x, y = poly.exterior.xy
ny = np.where(np.isclose(y, 32), 30, y)
new_poly = shpg.Polygon(np.array((x, ny)).T)
rgi_reg.loc[rgi_reg.RGI_CODE == '12-02', 'geometry'] = new_poly

In [None]:
# Remove the useless Alaska box in the Eastern hemisphere
rgi_reg = rgi_reg.drop(3).reset_index()
del rgi_reg['index']

In [None]:
# Rename things that need renaming
rgi_reg.loc[91, 'RGI_CODE'] = '20-01'
rgi_reg.loc[91, 'FULL_NAME'] = 'Antarctic Mainland'
rgi_reg = rgi_reg.rename({'WGMS_CODE':'LONG_CODE'}, axis=1)
rgi_reg['FULL_NAME'] = [df_ref.loc[df_ref.RGI_CODE == i, 'FULL_NAME'].values[0] for i in rgi_reg['RGI_CODE']]
rgi_reg['LONG_CODE'] = [i + '_' + df_ref.loc[df_ref.RGI_CODE == i, 'LONG_CODE'].values[0] for i in rgi_reg['RGI_CODE']]

In [None]:
(~ rgi_reg.is_valid).sum()

In [None]:
for i, s in rgi_reg.loc[~ rgi_reg.is_valid].iterrows():
    corr = make_valid(s.geometry)
    if type(corr) == shpg.Polygon:
        print(f'Success : {s.FULL_NAME}')
        rgi_reg.loc[i, 'geometry'] = corr
    else:
        print(f'Not good : {s.FULL_NAME}')
        for g in corr.geoms:
            print(type(g))

In [None]:
corr.geoms[0]

In [None]:
corr.geoms[1]

In [None]:
rgi_reg.loc[i, 'geometry'] = corr.geoms[0]

In [None]:
(~ rgi_reg.is_valid).sum()

In [None]:
rgi_reg

In [None]:
# Remove RGI 20
# rgi_reg = rgi_reg.iloc[:-1]

In [None]:
# Remove Greenland icesheet
rgi_reg = rgi_reg.loc[rgi_reg.RGI_CODE != '05-11'].copy()

In [None]:
# Add o1region for help
rgi_reg['o1Region'] = [s.split('-')[0] for s in rgi_reg.RGI_CODE]

In [None]:
# order 
rgi_reg = rgi_reg[['o1Region', 'RGI_CODE', 'FULL_NAME', 'LONG_CODE', 'geometry']]

In [None]:
# Remame things
names = [s.lower() for s in rgi_reg.columns]
names[1] = 'o2region'
rgi_reg.columns = names
rgi_reg

In [None]:
len(rgi_reg.o2region.unique())

In [None]:
rgi_reg.crs = 'EPSG:4326'

In [None]:
out_file_dir = os.path.join(out_dir, '00_rgi70_O2Regions')
rgi_reg.to_file(out_file_dir)

In [None]:
shutil.make_archive(f'{out_dir}/00_rgi70_O2Regions', 'zip', out_file_dir)

In [None]:
print(rgi_reg.set_index('o2region')[['full_name', 'long_code']].to_markdown())