<img src="https://raw.githubusercontent.com/OGGM/oggm/master/docs/_static/logo.png" width="40%"  align="left">

In [None]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import salem
import seaborn as sns
%matplotlib inline

In [None]:
from __init__ import DATA_DIR, PLOT_DIR

# Ref Table 

In [None]:
# Get the RGI
import geopandas as gpd
import glob, os
import oggm
from oggm.utils import get_rgi_dir
frgi = '/home/mowglie/Documents/OGGM_Experiments/rgi60_allglaciers.csv'
rgi_dir = get_rgi_dir(version='6')
if not os.path.exists(frgi):
    # one time action only
    fs = list(sorted(glob.glob(rgi_dir + "/*/*_rgi60_*.shp")))[2:]
    out = []
    for f in fs:
        sh = gpd.read_file(f).set_index('RGIId')
        del sh['geometry']
        out.append(sh)
    mdf = pd.concat(out)
    mdf.to_csv(frgi)
mdf = pd.read_csv(frgi, index_col=0, converters={'Form': str, 'TermType': str, 'RGIFlag':str, 'BgnDate':str, 
                                                 'EndDate':str, 'O1Region': str, 'O2Region':str, 'Name':str})
mdf['RGI_REG'] = [rid.split('-')[1].split('.')[0] for rid in mdf.index]
# Read glacier attrs
gtkeys = {'0': 'Glacier',
          '1': 'Ice cap',
          '2': 'Perennial snowfield',
          '3': 'Seasonal snowfield',
          '9': 'Not assigned',
          }
ttkeys = {'0': 'Land-terminating',
          '1': 'Marine-terminating',
          '2': 'Lake-terminating',
          '3': 'Dry calving',
          '4': 'Regenerated',
          '5': 'Shelf-terminating',
          '9': 'Not assigned',
          }
mdf['GlacierType'] = [gtkeys[g] for g in mdf.Form]
mdf['TerminusType'] = [ttkeys[g] for g in mdf.TermType]
mdf['IsTidewater'] = [ttype in ['Marine-terminating', 'Lake-terminating'] for ttype in mdf.TerminusType]
mdf['RGIId'] = mdf.index.values

In [None]:
len(mdf)

In [None]:
sns.countplot(x="GlacierType", data=mdf);

In [None]:
sns.countplot(x="RGI_REG", data=mdf);

In [None]:
mdf = mdf.loc[mdf.RGI_REG != '19']
print(len(mdf))

# Read in errors

In [None]:
dd = DATA_DIR + 'standard_prepro'
rgi_regs = ['{:02}'.format(p) for p in np.arange(1, 19)]
df = []
for r in rgi_regs:
    p = os.path.join(dd, 'glacier_statistics_{}.csv'.format(r))
    _df = pd.read_csv(p, index_col=0, low_memory=False)
    df.append(_df)
df = pd.concat(df, sort=False).sort_index()

In [None]:
assert len(df) == len(mdf)

In [None]:
df.columns

In [None]:
ax = sns.countplot(y="error_task", data=df)

In [None]:
for rid, m in df.loc[~df.error_msg.isnull()].error_msg.iteritems():
    m = m.replace(rid, 'glacier')
    if 'mu* out of specified bounds' in m:
        try:
            sig = float(m.split(':')[-1])
            m = ':'.join(m.split(':')[:-1])
            if sig > 9000:
                m += ': +'
            else:
                m += ': -'
        except ValueError:
            pass
    
    task = df.loc[rid, 'error_task']
    if task in ['local_t_star', 'mu_star_calibration']:
        cat = 'climate'
    else:
        cat = 'others'
    
    df.loc[rid, 'error_msg'] = m
    df.loc[rid, 'error_cat'] = cat

In [None]:
ax = sns.countplot(y="error_msg", data=df)

In [None]:
_err = df.loc[~df.error_msg.isnull()].index

## Add dynamics 

In [None]:
dd = DATA_DIR + '/dyn_exps/rgi_reg_{}/task_log.csv'
ddf = []
for r in rgi_regs:
    p = os.path.join(dd.format(r))
    _df = pd.read_csv(p, index_col=0, low_memory=False)
    ddf.append(_df)
ddf = pd.concat(ddf, sort=False).sort_index()
# Take only non-2000 because of bug in script 
ddf = ddf[['run_random_climate_rdn_tstar', 'run_random_climate_rect_rdn_tstar']]
# Add corrected
dd = DATA_DIR + '/dyn_exps/rgi_reg_{}/task_log_2000bf.csv'
_ddf = []
for r in rgi_regs:
    p = os.path.join(dd.format(r))
    _df = pd.read_csv(p, index_col=0, low_memory=False)
    _ddf.append(_df)
_ddf = pd.concat(_ddf, sort=False).sort_index()
assert len(_ddf) == len(ddf)
ddf = pd.concat([ddf, _ddf], axis=1, sort=False)
# Add bf
dd = DATA_DIR + '/dyn_exps/rgi_reg_{}/task_log_noseed_bf.csv'
_ddf = []
for r in rgi_regs:
    p = os.path.join(dd.format(r))
    _df = pd.read_csv(p, index_col=0, low_memory=False)
    _ddf.append(_df)
_ddf = pd.concat(_ddf, sort=False).sort_index()
assert len(_ddf) == len(ddf)
ddf['run_random_climate_rdn_tstar'] = _ddf['run_random_climate_rdn_tstar_noseed']

In [None]:
ddf.loc[_err] = None

In [None]:
ddf = ddf.where(ddf != 'SUCCESS', other=None)

In [None]:
ddf.count()

In [None]:
for c in ddf.columns:
    cc = ddf[c].value_counts()
    print(c, cc[0] / cc.sum()*100)
    print(cc)

In [None]:
772 - 21

In [None]:
ax = sns.countplot(y="run_random_climate_rdn_2000", data=ddf)

In [None]:
ax = sns.countplot(y="run_random_climate_rect_rdn_tstar", data=ddf)

In [None]:
ddf = ddf.dropna(how='all')

In [None]:
df.loc[ddf.index, 'error_msg'] = 'Nums'
df.loc[ddf.index, 'error_cat'] = 'dynamics'
df.loc[ddf.index, 'error_task'] = 'dynamics'

## Aggregate

In [None]:
dfe = df.loc[~df.error_task.isnull()][['rgi_region', 'rgi_area_km2']].copy()

In [None]:
summary = dfe.groupby('rgi_region').sum()
summary.columns = ['AREA_ERR']
if 6 not in summary.index:
    summary.loc[6, 'AREA_ERR'] = 0
summary = summary.sort_index()
summary['N_ERR'] = dfe.groupby('rgi_region').count()['rgi_area_km2']
summary['N_GLACIERS'] = df.groupby('rgi_region').count()['rgi_area_km2']
summary['TOTAL_AREA'] = df.groupby('rgi_region').sum()['rgi_area_km2']

In [None]:
from oggm.utils import parse_rgi_meta
reg_names, subreg_names = parse_rgi_meta(version='6')
summary['REG_NAME'] = [reg_names.loc[int(k)].values[0] for k in summary.index]

In [None]:
summary = summary[['REG_NAME', 'N_GLACIERS', 'TOTAL_AREA', 'N_ERR', 'AREA_ERR']].copy()

In [None]:
dfpre = df.loc[df.error_cat == 'climate']
summary['N_ERR_CLIMATE'] = dfpre.groupby('rgi_region').count()['error_cat']
summary['AREA_ERR_CLIMATE'] = dfpre.groupby('rgi_region').sum()['rgi_area_km2']

dfpre = df.loc[df.error_cat == 'dynamics']
summary['N_ERR_DYNAMS'] = dfpre.groupby('rgi_region').count()['error_cat']
summary['AREA_ERR_DYNAMS'] = dfpre.groupby('rgi_region').sum()['rgi_area_km2']

dfpre = df.loc[df.error_cat == 'others']
summary['N_ERR_OTHERS'] = dfpre.groupby('rgi_region').count()['error_cat']
summary['AREA_ERR_OTHERS'] = dfpre.groupby('rgi_region').sum()['rgi_area_km2']

In [None]:
summary.index = ['{:02d}'.format(i) for i in summary.index]

In [None]:
tmp = summary.sum()
tmp.name = 'TOTAL'
summary = summary.append(tmp)
summary['PERC_ERR_AREA_TOTAL'] = summary['AREA_ERR'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_CLIMATE'] = summary['AREA_ERR_CLIMATE'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_DYNAMS'] = summary['AREA_ERR_DYNAMS'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_OTHERS'] = summary['AREA_ERR_OTHERS'] / summary['TOTAL_AREA'] * 100

In [None]:
summary.loc['TOTAL', 'REG_NAME'] = ''

In [None]:
summary = summary.fillna(0)

In [None]:
summary

In [None]:
for_latex = pd.DataFrame(index=[i + ': ' + n for i, n in zip(summary.index, summary.REG_NAME)])
for_latex['N'] = summary.N_GLACIERS.values 
for_latex['Area (km2)'] = ['{:.0f}'.format(n) for n in summary.TOTAL_AREA.values] 
for_latex['Climate'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_CLIMATE, summary.PERC_ERR_AREA_CLIMATE)]
for_latex['Dynamics'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_DYNAMS, summary.PERC_ERR_AREA_DYNAMS)]
for_latex['Others'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_OTHERS, summary.PERC_ERR_AREA_OTHERS)]
for_latex['All'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR, summary.PERC_ERR_AREA_TOTAL)]
for c in for_latex.columns:
    for i, v in for_latex[c].iteritems():
        if v == '0 (0.0%)':
            for_latex.loc[i, c] = ''
for_latex

In [None]:
print(for_latex.to_latex()
      .replace('TOTAL:', 'TOTAL ')
      .replace('toprule', 'tophline ')
      .replace('midrule', 'middlehline ')
      .replace('bottomrule', 'bottomhline ')
      )

In [None]:
edf = df[['rgi_region', 'rgi_subregion', 
          'cenlon', 'cenlat', 'rgi_area_km2', 'glacier_type', 
          'terminus_type', 'status', 
          'error_task', 'error_msg', 'error_cat']].dropna()
assert len(edf) == len(dfe)
edf.to_csv(DATA_DIR + 'error_summary.csv')