<img src="https://raw.githubusercontent.com/OGGM/oggm/master/docs/_static/logo.png" width="40%"  align="left">

In [None]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import salem
import seaborn as sns
%matplotlib inline

# Ref Table 

In [None]:
# Get the RGI
import geopandas as gpd
import glob, os
import oggm
from oggm.utils import get_rgi_dir
frgi = '/home/mowglie/Documents/OGGM_Experiments/rgi60_allglaciers.csv'
rgi_dir = get_rgi_dir(version='6')
if not os.path.exists(frgi):
    # one time action only
    fs = list(sorted(glob.glob(rgi_dir + "/*/*_rgi60_*.shp")))[2:]
    out = []
    for f in fs:
        sh = gpd.read_file(f).set_index('RGIId')
        del sh['geometry']
        out.append(sh)
    mdf = pd.concat(out)
    mdf.to_csv(frgi)
mdf = pd.read_csv(frgi, index_col=0, converters={'Form': str, 'TermType': str, 'RGIFlag':str, 'BgnDate':str, 
                                                 'EndDate':str, 'O1Region': str, 'O2Region':str, 'Name':str})
mdf['RGI_REG'] = [rid.split('-')[1].split('.')[0] for rid in mdf.index]
# Read glacier attrs
gtkeys = {'0': 'Glacier',
          '1': 'Ice cap',
          '2': 'Perennial snowfield',
          '3': 'Seasonal snowfield',
          '9': 'Not assigned',
          }
ttkeys = {'0': 'Land-terminating',
          '1': 'Marine-terminating',
          '2': 'Lake-terminating',
          '3': 'Dry calving',
          '4': 'Regenerated',
          '5': 'Shelf-terminating',
          '9': 'Not assigned',
          }
mdf['GlacierType'] = [gtkeys[g] for g in mdf.Form]
mdf['TerminusType'] = [ttkeys[g] for g in mdf.TermType]
mdf['IsTidewater'] = [ttype in ['Marine-terminating', 'Lake-terminating'] for ttype in mdf.TerminusType]
mdf['RGIId'] = mdf.index.values

In [None]:
len(mdf)

In [None]:
sns.countplot(x="GlacierType", data=mdf);

In [None]:
sns.countplot(x="RGI_REG", data=mdf);

In [None]:
# 10 largest
mdf.sort_values(by='Area').iloc[-10:]

In [None]:
mdf = mdf.loc[mdf.RGI_REG != '19']
print(len(mdf))

# Read in errors

In [None]:
dd = '/home/mowglie/disk/OGGM_Output/run_output_summary/'
rgi_regs = ['rgi_reg_{:02}'.format(p) for p in np.arange(1, 19)]
df = pd.DataFrame()
for r in rgi_regs:
    ldir = os.path.join(dd, r, 'log/*.ERROR')
    paths = glob.glob(ldir)
    for p in sorted(paths):
        rid = os.path.basename(p).replace('.ERROR', '')
        df.loc[rid, 'RGI_REG'] = os.path.basename(p).split('-')[1].split('.')[0]
        with open(p, 'r') as f:
            first_line = f.readline().replace('\n', '')
        df.loc[rid, 'TASK'] = first_line.split(';')[1].strip()
        df.loc[rid, 'TYPE'] = first_line.split(';')[2].strip()
        df.loc[rid, 'MESSAGE'] = ' '.join(first_line.split(';')[3:]).strip()

In [None]:
assert len(df.index.get_duplicates()) == 0

## Add mine 

In [None]:
# Inv
dd = '/home/mowglie/disk/OGGM_Output/run_output_summary'
rgi_regs = ['rgi_reg_{:02}'.format(p) for p in np.arange(1, 19)]
dfi = []
for r in rgi_regs:
    p = os.path.join(dd, r, 'glacier_characteristics.csv')
    _df = pd.read_csv(p, index_col=0, low_memory=False)
    _df['RGI_REG'] = r[-2:]
    dfi.append(_df)
dfi = pd.concat(dfi)

In [None]:
dfi = dfi.loc[~dfi.index.isin(df.index)]
assert np.all(~dfi.inv_thickness_m.isnull())

In [None]:
dfi_dem_err = dfi.loc[(dfi.flowline_max_elev - dfi.flowline_min_elev) <= 1].copy()
dfi_dem_err = dfi_dem_err[['RGI_REG']]
dfi_dem_err['TASK'] = 'glacier_masks'
dfi_dem_err['TYPE'] = 'RuntimeError'
dfi_dem_err['MESSAGE'] = 'DEM in glacier too flat'

In [None]:
dfi = dfi.loc[~dfi.index.isin(dfi_dem_err.index)]

In [None]:
dfi_pcp_err = dfi.loc[dfi.tstar_avg_prcpsol_max_elev <= 10].copy()
dfi_pcp_err = dfi_pcp_err[['RGI_REG']]
dfi_pcp_err['TASK'] = 'local_mustar'
dfi_pcp_err['TYPE'] = 'RuntimeError'
dfi_pcp_err['MESSAGE'] = 'Prcp < 10'

In [None]:
dfi = dfi.loc[~dfi.index.isin(dfi_pcp_err.index)]

In [None]:
dfi_mu_err = dfi.loc[dfi.mu_star < 1].copy()
dfi_mu_err = dfi_mu_err[['RGI_REG']]
dfi_mu_err['TASK'] = 'local_mustar'
dfi_mu_err['TYPE'] = 'RuntimeError'
dfi_mu_err['MESSAGE'] = 'mu < 1'

In [None]:
df = pd.concat([df, dfi_dem_err, dfi_pcp_err, dfi_mu_err])
assert len(df.index.get_duplicates()) == 0

In [None]:
# Numerics
# Inv
dd = '/home/mowglie/disk/OGGM_Output/run_output_summary'
dfn = []
for r in rgi_regs:
    p = os.path.join(dd, r, 'task_log.csv')
    _df = pd.read_csv(p, index_col=0)
    _df['RGI_REG'] = r[-2:]
    dfn.append(_df)
dfn = pd.concat(dfn)

In [None]:
# We do it for all relevant tasks
tasks = ['random_glacier_evolution_rdn_tstar' , 'random_glacier_evolution_rdn_2000',
         'random_glacier_evolution_rdn_2000_tbias_p05', 'random_glacier_evolution_rdn_2000_tbias_m05']
for t in tasks:
    dfn = dfn.loc[~dfn.index.isin(df.index)]
    dfn_err = dfn.loc[dfn[t] != 'SUCCESS'].copy()
    dfn_err = dfn_err[['RGI_REG', t]]
    dfn_err.columns = ['RGI_REG', 'MESSAGE']
    dfn_err['TASK'] = 'random_glacier_evolution'
    dfn_err['TYPE'] = 'RuntimeError'
    dfn_err = dfn_err[df.columns]
    df = pd.concat([df, dfn_err])

In [None]:
df = df.sort_index()
assert len(df.index.get_duplicates()) == 0

# Analyse Errors 

In [None]:
df['AREA'] = mdf.loc[df.index].Area

In [None]:
len(df)

In [None]:
ax = sns.countplot(y="RGI_REG", data=df)

In [None]:
sns.countplot(y="TASK", data=df);

In [None]:
df.AREA.sum() / mdf.Area.sum() * 100

### Summary 

In [None]:
summary = df.groupby('RGI_REG').sum()
summary.columns = ['AREA_ERR']
summary['N_ERR'] = df.groupby('RGI_REG').count()['TASK']
summary['N_GLACIERS'] = mdf.groupby('RGI_REG').count()['GLIMSId']
summary['TOTAL_AREA'] = mdf.groupby('RGI_REG').sum()['Area']

In [None]:
from oggm.utils import parse_rgi_meta
reg_names, subreg_names = parse_rgi_meta(version='6')
summary['REG_NAME'] = [reg_names.loc[int(k)].values[0] for k in summary.index]

In [None]:
summary = summary[['REG_NAME', 'N_GLACIERS', 'TOTAL_AREA', 'N_ERR', 'AREA_ERR']]

In [None]:
dfpre = df.loc[df.TASK.isin(['local_mustar'])]
summary['N_ERR_CLIMATE'] = dfpre.groupby('RGI_REG').count()['TASK']
summary['AREA_ERR_CLIMATE'] = dfpre.groupby('RGI_REG').sum()['AREA']

dfpre = df.loc[df.TASK.isin(['random_glacier_evolution'])]
summary['N_ERR_DYNAMS'] = dfpre.groupby('RGI_REG').count()['TASK']
summary['AREA_ERR_DYNAMS'] = dfpre.groupby('RGI_REG').sum()['AREA']

dfpre = df.loc[~df.TASK.isin(['random_glacier_evolution', 'local_mustar'])]
summary['N_ERR_OTHERS'] = dfpre.groupby('RGI_REG').count()['TASK']
summary['AREA_ERR_OTHERS'] = dfpre.groupby('RGI_REG').sum()['AREA']

In [None]:
tmp = summary.sum()
tmp.name = 'TOTAL'
summary = summary.append(tmp)
summary['PERC_ERR_AREA_TOTAL'] = summary['AREA_ERR'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_CLIMATE'] = summary['AREA_ERR_CLIMATE'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_DYNAMS'] = summary['AREA_ERR_DYNAMS'] / summary['TOTAL_AREA'] * 100
summary['PERC_ERR_AREA_OTHERS'] = summary['AREA_ERR_OTHERS'] / summary['TOTAL_AREA'] * 100

In [None]:
summary.loc['TOTAL', 'REG_NAME'] = ''

In [None]:
summary = summary.fillna(0)

In [None]:
for_latex = pd.DataFrame(index=[i + ': ' + n for i, n in zip(summary.index, summary.REG_NAME)])
for_latex['N'] = summary.N_GLACIERS.values 
for_latex['Area (km2)'] = ['{:.0f}'.format(n) for n in summary.TOTAL_AREA.values] 
for_latex['Climate'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_CLIMATE, summary.PERC_ERR_AREA_CLIMATE)]
for_latex['Dynamics'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_DYNAMS, summary.PERC_ERR_AREA_DYNAMS)]
for_latex['Others'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR_OTHERS, summary.PERC_ERR_AREA_OTHERS)]
for_latex['All'] = ['{} ({:.1f}%)'.format(int(n), p) for n, p in 
                                zip(summary.N_ERR, summary.PERC_ERR_AREA_TOTAL)]
for c in for_latex.columns:
    for i, v in for_latex[c].iteritems():
        if v == '0 (0.0%)':
            for_latex.loc[i, c] = ''
for_latex

In [None]:
print(for_latex.to_latex()
      .replace('TOTAL:', 'TOTAL ')
      .replace('toprule', 'tophline ')
      .replace('midrule', 'middlehline ')
      .replace('bottomrule', 'bottomhline ')
      )

## Write out 

In [None]:
df.to_csv('/home/mowglie/disk/OGGM_Output/list_errors.csv')

## Per Task analysis for text 

In [None]:
dfs = df.loc[df.TASK == 'random_glacier_evolution']
dfs.groupby('MESSAGE').count()

In [None]:
dfs = df.loc[df.TASK == 'local_mustar']
dfs