In [1]:
%matplotlib inline
import xarray as xr
import cartopy
import cartopy.crs as ccrs
import salem
import pandas as pd
import geopandas as gpd
from oggm import utils
import os, glob

In [10]:
# Get the RGI
rgi_dir = utils.get_rgi_dir(version='62')

fs = list(sorted(glob.glob(rgi_dir + "/*/*_rgi6*_*.shp")))[2:]
out = []
for f in fs:
    sh = gpd.read_file(f).set_index('RGIId')
    del sh['geometry']
    out.append(pd.DataFrame(sh))
mdf = pd.concat(out)

mdf['O1Region'] = ['{:02d}'.format(int(i)) for i in mdf['O1Region']]
mdf['O2Region'] = ['{:02d}'.format(int(i)) for i in mdf['O2Region']]

# Read glacier attrs
gtkeys = {0: 'Glacier',
          1: 'Ice cap',
          2: 'Perennial snowfield',
          3: 'Seasonal snowfield',
          9: 'Not assigned',
          }
ttkeys = {0: 'Land-terminating',
          1: 'Marine-terminating',
          2: 'Lake-terminating',
          3: 'Dry calving',
          4: 'Regenerated',
          5: 'Shelf-terminating',
          9: 'Not assigned',
          }
stkeys = {0: 'Glacier or ice cap',
          1: 'Glacier complex',
          2: 'Nominal glacier',
          9: 'Not assigned',
          }
mdf['GlacierType'] = [gtkeys[g] for g in mdf.Form]
mdf['TerminusType'] = [ttkeys[g] for g in mdf.TermType]
mdf['GlacierStatus'] = [stkeys[g] for g in mdf.Status]
mdf['IsTidewater'] = [ttype in ['Marine-terminating', 'Lake-terminating'] for ttype in mdf.TerminusType]
mdf['IsNominal'] = [stype == 'Nominal glacier' for stype in mdf.GlacierStatus]

In [11]:
mdf = mdf.drop(['check_geom'], axis=1)

In [12]:
for i, d in mdf.iterrows():
    assert i[6:8] == d['O1Region']

In [13]:
mdf.to_hdf('rgi62_stats.h5', key='df', mode='w', complevel=5)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['GLIMSId', 'BgnDate', 'EndDate', 'O1Region', 'O2Region', 'Name',
       'GlacierType', 'TerminusType', 'GlacierStatus'],
      dtype='object')]

  pytables.to_hdf(


In [2]:
df = pd.read_hdf('rgi62_stats.h5')

In [3]:
df.columns

Index(['GLIMSId', 'BgnDate', 'EndDate', 'CenLon', 'CenLat', 'O1Region',
       'O2Region', 'Area', 'Zmin', 'Zmax', 'Zmed', 'Slope', 'Aspect', 'Lmax',
       'Status', 'Connect', 'Form', 'TermType', 'Surging', 'Linkages', 'Name',
       'GlacierType', 'TerminusType', 'GlacierStatus', 'IsTidewater',
       'IsNominal'],
      dtype='object')

In [4]:
df = df.loc[df['Connect'] != 2]

In [5]:
df['rgi_year'] = [int(s[0:4]) for s in df.BgnDate]

In [7]:
len(df.loc[df['rgi_year'] < 0])

633

In [8]:
df = df.loc[df['rgi_year'] > 0]

In [9]:
df['rgi_year'].min()

1943

In [10]:
df.groupby('O1Region').median()['rgi_year']

O1Region
01    2009.0
02    2004.0
03    1999.0
04    2001.0
05    2001.0
06    2000.0
07    2008.0
08    2002.0
09    2001.0
10    2011.0
11    2003.0
12    2001.0
13    2006.0
14    2001.0
15    2001.0
16    2000.0
17    2000.0
18    1978.0
19    1989.0
Name: rgi_year, dtype: float64