In [1]:
import pointCollection as pc
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import h5py
%matplotlib notebook
import scipy.interpolate as si
import scipy.stats as ss
import h5py
import os
import re
import matplotlib.gridspec as gridspec
import pandas as pd
import seaborn as sns

# change this data root to match your own
data_root = '/Volumes/ice2/ben/MAR/ATL11_with_corrections/'



In [2]:
drainage_basins=pc.grid.data().from_geotif(data_root+'/drainage_basins.tif')
gimp_mask=pc.grid.data().from_geotif(data_root+'/GimpIceMask_1km.tif')
v_mask=pc.grid.data().from_geotif(data_root+'/vel_mask_annual_sigma_lt_20.tif')
v_mask.index(np.arange(0, v_mask.shape[0], 5), np.arange(0, v_mask.shape[1], 5));
dbi=si.RegularGridInterpolator((drainage_basins.y, drainage_basins.x), drainage_basins.z, method='nearest')

In [3]:
def RDE(x):
    xs=x.copy()
    xs=np.isfinite(xs)   # this changes xs from values to a boolean
    N=len(xs.nonzero()[0])
    if N<2 :
        return np.nan
    ind=np.arange(0.5, N)
    LH=np.interp(np.array([0.16, 0.84])*N, ind, np.sort(x[xs]))
    #print('LH =',LH)
    return (LH[1]-LH[0])/2.  # trying to get some kind of a width of the data ~variance


## read in the data file.  
The data were written by the 'reduce_firn_dh' notebook.  It contains one top-level group for each model, and beneath that, one group for each epoch.  Within each epoch, the data represent a 2.5-km blockmedian of the corrected elevation-change values.  This gives a smaller number of difference values for each region, and makes a file of reasonable size.

Applying the blockmedian to the drainage basin variable didn't work well (cells containing borders aren't represented well), so let's repeat the interpolation for the cell locations.

In [4]:
data_file=os.path.join(data_root+'/combined_xover_at_differences.h5')


strings_xover = ['2018.Q4-2019.Q2','2019.Q1-2019.Q2']
strings_at=['2019.Q2-2019.Q3', '2019.Q3-2019.Q4', '2019.Q4-2020.Q1']
epochs=strings_xover+strings_at

D={}
with h5py.File(data_file,'r') as h5f:
    models=list(h5f.keys())
    print(models)
for model in models:
    D[model]={}
    for epoch in epochs:
        this_group='/'.join(['',model, epoch])
        D[model][epoch]=pc.data().from_h5(data_file, group=this_group)
        D[model][epoch].assign({'basin':np.floor(dbi.__call__((D[model][epoch].y, D[model][epoch].x))*10)/10})

['GSFC_fdm_v1_zsurf', 'MARv3_11_2_ERA_10km_zsurf', 'MARv3_11_2_ERA_20km_zsurf', 'MARv3_11_2_NCEP_20km_zsurf', 'MARv3_11_ERA_zsurf']


In [5]:
D[model][epoch]

<class 'pointCollection.data.data'> with shape (15683,),
with fields:
['basin', 'dh', 'dh_corr', 'h0', 't0', 't1', 'x', 'y']

### Calculate the percentiles for all models and for all epochs
We'll save the percentiles of the elevation differences for all the models to look at collectively

In [6]:
h_ranges=[(-10, 2000), (2000, 5000)]
h_range_cols={h_ranges[0]:0, h_ranges[1]:1}
h_range_colors={h_ranges[0]:'r', h_ranges[1]:'g'}
h_range_names=['< 2000', '> 2000']

basins=np.arange(1,9)

percentiles=np.array([5, 16, 50, 84, 95])
pct={epoch :\
     {model:\
      {h_range:\
       {rc:np.zeros((basins.size, percentiles.size)) for rc in ['raw', 'corr']}\
       for h_range in h_ranges}\
      for model in models}
     for epoch in epochs + ['melt', 'non_melt', 'all']}

for count, model in enumerate(models):
    for epoch in epochs +['melt', 'non_melt','all']:
        for h_range in h_ranges:
            pct[epoch][model][h_range]['count']=np.zeros(basins.size)

for count, model in enumerate(models): 
    for epoch in epochs:
        for basin in basins:
            for h_range in h_ranges: 
                Dsub=D[model][epoch]
                els = np.floor(Dsub.basin)==basin
                els &= (Dsub.h0 > h_range[0])
                els &= (Dsub.h0 < h_range[1])
                Dsub=Dsub[els]
                pct[epoch][model][h_range]['raw'][basin-1,:]=ss.scoreatpercentile(Dsub.dh, percentiles)
                pct[epoch][model][h_range]['corr'][basin-1,:]=ss.scoreatpercentile(Dsub.dh_corr, percentiles)
                pct[epoch][model][h_range]['count'][basin-1]=Dsub.size

In [7]:
# melt season runs May 15 -> August 1  (May begins 4*30 days into the new year, 2019 starts ~day 365.25)
melt_season=np.array([365.25+4*30+15, 365.25+7*30])*3600*24

for count, model in enumerate(models): 
    for basin in basins:
        for h_range in h_ranges: 
            melt_data=[]
            non_melt_data=[]
            all_data=[]
            for epoch in epochs:
                Dsub=D[model][epoch]
                els = np.floor(Dsub.basin)==basin
                els &= (Dsub.h0 > h_range[0])
                els &= (Dsub.h0 < h_range[1])
                Dsub=Dsub[els]
                melt_days = ( ((Dsub.t0 < melt_season[0] ) & (Dsub.t1 > melt_season[0]) ) *(Dsub.t1-melt_season[0]) + \
                ((Dsub.t0 > melt_season[0] ) & (Dsub.t1 < melt_season[1]) )*(Dsub.t1-Dsub.t0) + \
                ((Dsub.t1 > melt_season[0] ) & (Dsub.t1 < melt_season[1]) ) *(melt_season[1]-Dsub.t1) )/24/3600.
                melt_data += [Dsub[melt_days>40]]
                non_melt_data += [Dsub[melt_days < 7 ]]
                all_data += [Dsub]
            temp=pc.data().from_list(melt_data)
            pct['melt'][model][h_range]['raw'][basin-1,:] = ss.scoreatpercentile(temp.dh, percentiles)
            pct['melt'][model][h_range]['corr'][basin-1,:] = ss.scoreatpercentile(temp.dh_corr, percentiles)
            pct['melt'][model][h_range]['count'][basin-1]=temp.size
            temp=pc.data().from_list(non_melt_data)
            pct['non_melt'][model][h_range]['raw'][basin-1,:] = ss.scoreatpercentile(temp.dh, percentiles)
            pct['non_melt'][model][h_range]['corr'][basin-1,:] = ss.scoreatpercentile(temp.dh_corr, percentiles)
            pct['non_melt'][model][h_range]['count'][basin-1]=temp.size
            temp=pc.data().from_list(all_data)
            pct['all'][model][h_range]['raw'][basin-1,:] = ss.scoreatpercentile(temp.dh, percentiles)
            pct['all'][model][h_range]['corr'][basin-1,:] = ss.scoreatpercentile(temp.dh_corr, percentiles)
            pct['all'][model][h_range]['count'][basin-1]=temp.size

In [8]:
{basin:pct['melt'][model][h_ranges[1]]['count'][basin-1]/pct['non_melt'][model][h_ranges[1]]['count'][basin-1] for basin in basins}

{1: 0.3870482390149538,
 2: 0.3145705454703757,
 3: 0.2680590768240171,
 4: 0.19734710409072498,
 5: 0.16221628838451269,
 6: 0.32421182455869885,
 7: 0.2914984472049689,
 8: 0.3209974364949895}

### Tables of statistics:
This function reports a summary of a particular statistic for the different models.

In [22]:
#sn={'raw':'raw',
#        'GSFC_fdm_v1_zsurf':'GSFC_v1',
#        'MARv3_11_ERA_zsurf':'M3.11ERA.15', 
#        'MARv3_11_2_ERA_10km_zsurf':'M3.11.2ERA10',
#        'MARv3_11_2_ERA_20km_zsurf':'M3.11.2ERA20',
#        'MARv3_11_2_NCEP_20km_zsurf':'M311.2NC20'
#        ''}
sn={'raw':'raw',
        'GSFC_fdm_v1_zsurf':'GSFC_v1',
#        'MARv3_11_ERA_zsurf':'M3.11ERA.15', 
        'MARv3_11_2_ERA_10km_zsurf':'M3.11.2ERA10',
#        'MARv3_11_2_ERA_20km_zsurf':'M3.11.2ERA20',
       'MARv3_11_2_NCEP_20km_zsurf':'M311.2NC20'
        ''}


sigma_sn={model:f'$\sigma_{{{sn[model]}}}$' for model in sn.keys()}


In [32]:
def all_basin_stats(pct, epoch, h_range, var):
    if var=='lt_tail':
        p1=1
        p0=0
    elif var == 'ctr':
        p1=3
        p0=1
    elif var == 'rt_tail':
        p1=4
        p0=3

    #models=['GSFC_fdm_v1_zsurf', 'MARv3_11_ERA_zsurf', 'MARv3_11_2_ERA_10km_zsurf',
    # 'MARv3_11_2_ERA_20km_zsurf',
    # 'MARv3_11_2_NCEP_20km_zsurf']
    models=['GSFC_fdm_v1_zsurf',  'MARv3_11_2_ERA_10km_zsurf',
       'MARv3_11_2_NCEP_20km_zsurf']
    
    
    #short names

    sigma_sn={model:f'$\sigma_{{{sn[model]}}}$' for model in sn.keys()}
        
    column_list=['basin','raw', sigma_sn['raw']]
    for model in models:
        column_list += [sn[model]]
        column_list += [sigma_sn[model]]

    df={var:np.zeros(len(basins)) for var in column_list}
    df['basin']=np.arange(len(basins), dtype=int)+1
    
    # first two lines give  model, and resolution
    str1='basin\t raw'
    str2='\t \t'
    for model in models:
        m=re.compile('(.*)_(..km)_zsurf').search(model)
        if m is not None:
            str1 += f'\t {m.group(1)}'
            str2 += f'\t {m.group(2)}'
        else:
            str1 += f'\t {model.replace("_zsurf","")}'
            str2 += '\t 15km'              

    print(f'{epoch}\t{h_range[0]} m to {h_range[1]} m, {var}')
    #print(str1)
    #print(str2)
    # remaining lines give basin, range center, and range spread
    for basin in basins:
        raw=pct[epoch][models[1]][h_range]['raw'][basin-1,:]
        thestr=f'{basin}:\t {0.5*(raw[ p1]+raw[p0]):2.3f}({(raw[ p1]-raw[p0]):2.3f})'
        df['raw'][basin-1]=0.5*(raw[ p1]+raw[p0])
        df[sigma_sn['raw']][basin-1]=(raw[ p1]-raw[p0])
        for model in models:        
            corr=pct[epoch][model][h_range]['corr'][basin-1,:]
            thestr += f' \t{0.5*(corr[p1]-corr[p0]):2.3f}({(corr[p1]-corr[p0]):2.3f})'
            df[sn[model]][basin-1]=0.5*(corr[p1]-corr[p0])
            df[sigma_sn[model]][basin-1]=corr[p1]-corr[p0]                  
        #print(thestr)
    #df=pd.DataFrame(df)
    #df=df[columns]
    df=pd.DataFrame(df)[column_list]
    return df
    


In [33]:
def highlight_minabs(data, color='yellow'):
    '''
    highlight the maximum in a Series or DataFrame
    '''
    attr = 'background-color: {}'.format(color)
    if data.ndim == 1:  # Series from .apply(axis=0) or axis=1
        is_min = np.abs(data) == np.abs(data).min()
        return [attr if v else '' for v in is_min]
    else:  # from .apply(axis=None)
        is_min = data == np.abs(data).min().min()
        return pd.DataFrame(np.where(is_max, attr, ''),
                            index=data.index, columns=data.columns)

In [34]:
def bold_abs_lt_first(data):
    '''
    highlight the maximum in a Series or DataFrame
    '''
    attr_good = 'font-weight: bold' 
    attr_bad = 'color : red'
    is_lt = np.abs(data) < np.abs(data[0])
    temp = [attr_good if v else attr_bad for v in is_lt]
    temp[0] = ''
    return temp
    

## Center statistics: Central peak
Look at the width of the central peak for two epochs.  

#### All data, low elevation:

In [35]:
df=all_basin_stats(pct, 'all', h_ranges[0],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

all	-10 m to 2000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.03,0.24,0.12,0.24,0.14,0.28,0.17,0.35
1,2,-0.11,0.44,0.15,0.29,0.17,0.35,0.17,0.34
2,3,-0.32,1.01,0.23,0.46,0.36,0.72,0.32,0.64
3,4,-0.39,1.99,0.3,0.6,0.98,1.95,1.13,2.25
4,5,-0.42,1.75,0.41,0.82,0.46,0.91,0.41,0.81
5,6,-0.44,1.37,0.26,0.52,0.41,0.83,0.38,0.75
6,7,-0.39,0.96,0.2,0.4,0.35,0.7,0.41,0.83
7,8,-0.05,0.79,0.22,0.43,0.24,0.48,0.28,0.56


#### All data, high elevation:

In [36]:
df=all_basin_stats(pct, 'all', h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

all	2000 m to 5000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.02,0.15,0.08,0.17,0.06,0.12,0.06,0.12
1,2,-0.01,0.16,0.1,0.2,0.06,0.12,0.06,0.12
2,3,-0.05,0.23,0.14,0.29,0.12,0.24,0.11,0.21
3,4,-0.03,0.52,0.19,0.37,0.3,0.6,0.22,0.44
4,5,-0.15,0.76,0.4,0.8,0.44,0.88,0.36,0.72
5,6,-0.15,0.35,0.27,0.55,0.26,0.53,0.22,0.45
6,7,-0.1,0.22,0.13,0.26,0.09,0.19,0.08,0.17
7,8,-0.02,0.18,0.11,0.23,0.08,0.16,0.08,0.17


### All non-melt data, at low elevation:

In [37]:
df=all_basin_stats(pct, 'non_melt', h_ranges[0],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

non_melt	-10 m to 2000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.01,0.24,0.12,0.25,0.13,0.27,0.17,0.33
1,2,-0.05,0.39,0.14,0.29,0.16,0.32,0.17,0.33
2,3,-0.21,0.87,0.23,0.46,0.35,0.7,0.31,0.62
3,4,-0.43,2.01,0.26,0.53,0.96,1.91,1.12,2.24
4,5,-0.45,1.83,0.42,0.83,0.47,0.94,0.44,0.87
5,6,-0.38,1.32,0.25,0.49,0.38,0.75,0.33,0.67
6,7,-0.31,0.88,0.19,0.39,0.33,0.65,0.28,0.56
7,8,0.05,0.76,0.21,0.43,0.23,0.46,0.24,0.48


### All melt data, at low elevation:

In [38]:
df=all_basin_stats(pct, 'melt', h_ranges[0],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

melt	-10 m to 2000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.08,0.22,0.12,0.24,0.16,0.31,0.21,0.42
1,2,-0.22,0.41,0.15,0.3,0.2,0.4,0.19,0.38
2,3,-0.52,1.14,0.22,0.44,0.39,0.78,0.37,0.74
3,4,-0.13,1.94,0.49,0.99,1.05,2.1,1.26,2.53
4,5,-0.08,0.79,0.31,0.61,0.34,0.67,0.4,0.8
5,6,-0.69,1.34,0.28,0.56,0.47,0.94,0.56,1.11
6,7,-0.65,0.92,0.21,0.42,0.38,0.76,0.48,0.97
7,8,-0.39,0.66,0.22,0.44,0.22,0.45,0.31,0.62


### All non-melt data, at high elevation:

In [39]:
df=all_basin_stats(pct, 'non_melt', h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

non_melt	2000 m to 5000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.01,0.15,0.08,0.17,0.06,0.12,0.06,0.11
1,2,0.01,0.15,0.1,0.2,0.06,0.12,0.06,0.12
2,3,-0.02,0.19,0.14,0.28,0.11,0.22,0.1,0.21
3,4,-0.03,0.52,0.18,0.36,0.3,0.59,0.2,0.41
4,5,-0.14,0.78,0.4,0.8,0.44,0.88,0.35,0.7
5,6,-0.12,0.31,0.28,0.55,0.26,0.53,0.16,0.32
6,7,-0.08,0.21,0.12,0.24,0.09,0.17,0.07,0.15
7,8,-0.01,0.19,0.11,0.21,0.08,0.15,0.08,0.15


### All melt data, at high elevation:

In [40]:
df=all_basin_stats(pct, 'melt', h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

melt	2000 m to 5000 m, ctr


Unnamed: 0,basin,raw,$\sigma_{raw}$,GSFC_v1,$\sigma_{GSFC_v1}$,M3.11.2ERA10,$\sigma_{M3.11.2ERA10}$,M311.2NC20,$\sigma_{M311.2NC20}$
0,1,-0.07,0.11,0.09,0.17,0.05,0.11,0.08,0.15
1,2,-0.07,0.12,0.1,0.2,0.05,0.11,0.06,0.11
2,3,-0.17,0.22,0.16,0.32,0.15,0.29,0.14,0.28
3,4,-0.05,0.54,0.21,0.41,0.32,0.64,0.38,0.75
4,5,-0.31,0.49,0.33,0.66,0.43,0.86,0.55,1.11
5,6,-0.28,0.32,0.26,0.52,0.26,0.52,0.32,0.64
6,7,-0.17,0.21,0.15,0.31,0.12,0.23,0.19,0.38
7,8,-0.05,0.15,0.13,0.26,0.08,0.17,0.1,0.2


#### Epoch 1 (Q4 of 2018 to Q2 of 2019), low elevation:

In [None]:
df=all_basin_stats(pct, epochs[0], h_ranges[0],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

#### Epoch 1 (Q4 of 2018 to Q2 of 2019), high elevation:

In [None]:
df=all_basin_stats(pct, epochs[0], h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

#### Epoch 2: (Q1 of 2019 to Q2 of 2019), at low elevation:

In [None]:
df=all_basin_stats(pct, epochs[1], h_ranges[0],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

#### Epoch 2: (Q1 of 2019 to Q2 of 2019), at high elevation:

In [None]:
df=all_basin_stats(pct, epochs[1], h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

GSFC seems to have done the best job in reducing the model spread.  There isn't a substantial winner here among the MAR models.  The higher resolution models (MAR_v311_ERA @ 15km and Mar_v311.2 @ 10km) have smaller spreads in a few basins, but the differences are small. The corrections also don't seem to capture a large share of the variance in the data (spreads are comparable between raw and corrected data)

#### Epoch 3: (Q2 of 2019 to Q3 of 2019), at low elevation:

In [None]:
df=all_basin_stats(pct, epochs[2], h_ranges[1],'ctr')
df.style.set_precision(2).apply(highlight_minabs, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(highlight_minabs, color='orange', axis=1, subset=[sigma_sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sn[model] for model in sn.keys()])\
    .apply(bold_abs_lt_first, axis=1, subset=[sigma_sn[model] for model in sn.keys()])

Here the corrections seem to both make large reductions in the spread of the signal, and seem to correct for the large drawdown signals.  The statistics are similar across the models, although the v3.11.2 models seem to have modestly better spread than the V3.11 model, and the 10km v3.11.2 is better than the coarser resolution version.


# deeper dive into individual drainage basins: basin 1

In [19]:
basin=1
h_range=h_ranges[0]
h_bins=np.arange(-2, 2.01, 0.01)
dh_hist={}
dist_data={}
for model in models:
    dh_hist[model]={ver:np.zeros((len(h_bins)-1, len(epochs))) for ver in ['raw','corr']}
    dh_hist[model].update({ver:np.zeros(len(epochs)) for ver in ['raw_med', 'corr_med']})
    dh_hist[model]['raw_data']={}
    dh_hist[model]['corr_data']={}
    for col, epoch in enumerate(epochs):
        Dsub=D[model][epoch]
        els = np.floor(Dsub.basin)==basin
        els &= (Dsub.h0 > h_range[0])
        els &= (Dsub.h0 < h_range[1])
        Dsub=Dsub[els]
        dh_hist[model]['raw_med'][col] = np.median(Dsub.dh)
        dh_hist[model]['corr_med'][col] = np.median(Dsub.dh_corr)
        dh_hist[model]['raw'][:, col] = np.histogram(Dsub.dh, bins=h_bins, density=True)[0]
        dh_hist[model]['corr'][:, col] = np.histogram(Dsub.dh_corr, bins=h_bins, density=True)[0]
        good=np.abs(Dsub.dh_corr)<2.5
        dh_hist[model]['raw_data'][epoch]=Dsub.dh[good]
        dh_hist[model]['corr_data'][epoch]=Dsub.dh_corr[good]

This is a nicer way to compare two different models.  There's another version of violinplot that comes up on Google that puts one distribution on the left of each bar and one on the right.  Might be worth exploring.

In [21]:

basin=6
for count, model in enumerate([models[0], models[1]]):

    fig, hax = plt.subplots(1, len(epochs), gridspec_kw = {'wspace':0, 'hspace':0})

    fig.suptitle(model+' '+str(h_range), fontsize=12)

    for col, epoch in enumerate(epochs):

        V = pd.DataFrame({ "raw_data" : dh_hist[model]['raw_data'][epoch],\
                          "corr_data" : dh_hist[model]['corr_data'][epoch]})
        V = V.stack().reset_index()
        V = V.drop(columns=['level_0'])
        V.columns = ['htype','dh']

        sns.violinplot(x=[epoch[:7]+'\n'+epoch[7:]]*len(V), y='dh', data=V,
                  scale='count', bw=0.05,
                  hue='htype', split=True,
                  palette='RdGy',
                  inner='quartile',
                ax=hax[col])

        hax[col].set_ylim([-2, 2])
        hax[col].legend_.remove()
        if col != 0:
            hax[col].set_ylabel('')
            hax[col].set_yticklabels([])


    handles, labels = hax[0].get_legend_handles_labels()
    fig.legend(handles, labels, 
               loc='lower center', bbox_to_anchor=(0.5, 0.1),
               ncol=2, 
               facecolor='white', framealpha=1)

    fig.tight_layout()
    fig.subplots_adjust(top=0.93)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
[dh_hist[model]['raw_data'][epoch].shape,
dh_hist[model]['corr_data'][epoch].shape]
[model, epoch]