## The Saltenis estimator

One starts off by importing the relevant libraries and functions. The analytical values of the total indices are also imported so as to benchmark the values estimated.

In [1]:
%matplotlib inline
import datetime
from Functions import A1, A2, A3, B1, B2, B3, C1, C2
import numpy as np
import pandas as pd
import sobol_seq
import matplotlib.pyplot as plt
from pandas import ExcelWriter
plt.style.use('ggplot')

k = 6

a2 = np.array([0,0.5,3,9,99,99])
b3 = np.array([6.42,6.42,6.42,6.42,6.42,6.42])

functions = [A1, A2, A3, B1, B2, B3, C1, C2]

AE_df = pd.read_excel('AE_df.xlsx',index_col=0) #the analytical values

def scrambler(q,s):
    return q+s

def scrambler_left(q,s):
    return q-s

def Saltenis(a,b):
    return 0.5*(a-b)**2

The low-discrepancy sample matrix is then imported along with a scrambling rule so as to produce 50 independent repetions one can work with to produce a stable estimate.

In [2]:
p = 14
run = 50

n = 2

df = pd.DataFrame(sobol_seq.i4_sobol_generate(6*k,-1+2**p))

df_index = pd.read_csv('2019.10.08_index.csv', index_col=0)

df_r = pd.concat([df.T.reindex(df_index.iloc[r]).reset_index(drop=True).T for r in range(run)])

The mean absolute errors are finally estimated across functions

In [3]:
AE = {f.__name__:[] for f in functions}
AE_f = {f.__name__:[] for f in functions}
MAE = {f.__name__:[] for f in functions}
MAE_var = {f.__name__:pd.DataFrame() for f in functions}

RMSE = {f.__name__:pd.Series() for f in functions}
SE = {f.__name__:[] for f in functions}
SE_f = {f.__name__:[] for f in functions}

RE = {f.__name__:[] for f in functions}
RE_f = {f.__name__:[] for f in functions}

for f in functions:
    elementary_effect = []
    ee_df =[]
    ea = pd.DataFrame(f(df_r.iloc[:,:k]))
    for j in range(k):
        B_a = df_r.iloc[:,:2*k].copy()
        C_b = df_r.iloc[:,k:3*k].copy()
        B_a[j+k] = B_a[scrambler_left(j+k,k)]
        C_b[j+2*k] = C_b[scrambler_left(j+2*k,k)] # generate the scrambled matrices
        ee = pd.DataFrame((f(df_r.iloc[:,k:2*k].T.reset_index(drop=True).T)-f(C_b.iloc[:,k:2*k].T.reset_index(drop=True).T))\
                          *(f(B_a.iloc[:,k:2*k].T.reset_index(drop=True).T)-f(df_r.iloc[:,:k].T.reset_index(drop=True).T)),
                          columns=[j]) # compute the elementary effects
        ee['r']= [r for r in range(run) for rdf in range(len(df))]
        ee['r']= [r for r in range(run) for rdf in range(len(df))]
        ea['r']=ee['r']
        ee_df.append(ee[j])
        
        elementary_effect.append(ee.groupby('r')[j].expanding(1).mean()) # average out within individual repetitions
    
    elementary_effect_df = pd.concat(elementary_effect,axis=1)
    ea['r']=ee['r']
    Var_df = ea.groupby('r')[0].expanding(1).var(ddof=0) # compute the variance for each repetition
    
    AE[f.__name__] = np.abs(1-elementary_effect_df.div(Var_df,axis=0) - AE_df.loc[f.__name__]) # assess the absolute error for each factor
    SE[f.__name__] = (elementary_effect_df.div(Var_df,axis=0) - AE_df.loc[f.__name__])**2    
    RE[f.__name__] = AE[f.__name__].div(AE_df.loc[f.__name__])
    
    AE[f.__name__].index = AE[f.__name__].index.droplevel(0)
    SE[f.__name__].index = AE[f.__name__].index
    RE[f.__name__].index = AE[f.__name__].index    
    
    AE_f[f.__name__] = AE[f.__name__].groupby(AE[f.__name__].index).mean()
    SE_f[f.__name__] = np.sqrt(SE[f.__name__].groupby(SE[f.__name__].index).mean())
    RE_f[f.__name__] = RE[f.__name__].groupby(RE[f.__name__].index).mean()
    
    MAE[f.__name__] = AE_f[f.__name__].mean(axis=1) # compute the mean absolute error by averaging over the factors
    RMSE[f.__name__] = SE_f[f.__name__].mean(axis=1)
    
    MAE[f.__name__].index = (MAE[f.__name__].index+1)*2*(k+1) # set the index according to the total cost
    RMSE[f.__name__].index = MAE[f.__name__].index
    RE_f[f.__name__].index = MAE[f.__name__].index
    
writer = pd.ExcelWriter('Owen_T.xlsx', engine='xlsxwriter')
for mk in MAE.keys():
    MAE[mk].to_excel(writer, sheet_name=mk)
writer.save()

writer = pd.ExcelWriter('Owen_T_no_mean.xlsx', engine='xlsxwriter')
for mk in AE_f.keys():
    AE_f[mk].to_excel(writer, sheet_name=mk)
writer.save()

writer = pd.ExcelWriter('Owen_T_rmse.xlsx',engine='xlsxwriter')
for f in functions:
    RMSE[f.__name__].to_excel(writer, sheet_name=f.__name__)
writer.save()

writer = pd.ExcelWriter('Owen_T_RE.xlsx')
for f in functions:
    RE_f[f.__name__].loc[[2*(k+1)*(-1+2**s) for s in range(2,p+1)]].loc[:(k+1)*2**(p+1)].round(3).to_excel(writer, sheet_name=f.__name__)
writer.save()

  


The data are finally exported to benchmark the performance of the other estimators.

In [76]:
ls = ['-',':','-.','--',(0, (3, 5, 1, 5, 1, 5))]
mk = ['s','o','v','+','*']

for f in functions:
    Saltenis = pd.read_excel('Saltenis_asym.xlsx',sheet_name=f.__name__,index_col=0).loc[[(k+1)*(-1+2**s) for s in 
                                                                                        range(2,p+1)]].loc[:(k+1)*2**(p+1)]
    Owen = pd.read_excel('2020.04.05_Owen_T.xlsx',sheet_name=f.__name__,index_col=0).loc[[2*(k+1)*(-1+2**s) for s in range(2,
                                                                                        p+1)]].loc[:(k+1)*2**(p+1)]
    dataSets = [Saltenis,Owen]

    dSnames = ['Saltenis','Owen']
    
    dS_dic = dict(zip(dSnames,dataSets))
    
    co = -1
    for dk, dv in dS_dic.items():
        co +=1
        x_vals = dv.index
        y1 = dv.values
        plt.loglog(x_vals, y1, c='k', marker = mk[co],linestyle=ls[co],label = dk)
    plt.xlabel('Cost')
    plt.ylabel('MAE')
    plt.title(f.__name__)
    plt.legend()
    plt.savefig(str(datetime.datetime.now())[:10].replace('-','.')+'_'+f.__name__+'_Saltenis_Owen.png')
    plt.close()

In [6]:
writer = pd.ExcelWriter('Owen_T_RE.xlsx')
for f in functions:
    RE_f[f.__name__].loc[[2*(k+1)*(-1+2**s) for s in range(2,p+1)]].loc[:(k+1)*2**(p+1)].round(3).to_excel(writer, sheet_name=f.__name__)
writer.save()