## Comparison of the Lamboni and the Saltenis estimators

One starts off by importing the relevant libraries and functions. The analytical values of the total indices are also imported so as to benchmark the values estimated.

In [1]:
%matplotlib inline
from Functions import A1, A2, B1, B2, B3, C1, C2
import numpy as np
import pandas as pd
import sobol_seq
import matplotlib.pyplot as plt
from pandas import ExcelWriter
plt.style.use('ggplot')

The low-discrepancy sample matrix is then imported along with a scrambling rule so as to produce 50 independent repetions one can work with to produce a stable estimate.

In [2]:
k = 6

a2 = np.array([0,0.5,3,9,99,99])
b3 = np.array([6.42,6.42,6.42,6.42,6.42,6.42])

functions = [A1, A2, B1, B2, B3, C1, C2]

AE_df = pd.read_excel('AE_df.xlsx',index_col=0) #the analytical values

p = 13
run = 50

n = [2,3,4,6]

df_r = pd.DataFrame(sobol_seq.i4_sobol_generate(6*k, -1+2**p))

df_index = pd.read_csv('2019.10.08_index.csv', index_col=0)

df = pd.concat([df_r.T.reindex(df_index.iloc[r]).reset_index(drop=True).T for r in range(run)])

The mean absolute errors are finally estimated across the test functions for the Lamboni estimator

In [5]:
for in1,n1 in enumerate(n):
    run_samples = []

    MAE_dic = {f.__name__:pd.DataFrame(columns=[r for r in range(run)]) for f in functions}
    RMSE_dic = {f.__name__:pd.Series() for f in functions}
    SE_dic = {f.__name__:[] for f in functions}
    RE_l = {f.__name__:[] for f in functions}
    RE_dic = {f.__name__:[] for f in functions}
    for r in range (run):
        run_samples.append(df.iloc[int(r*(len(df)/run)):int((r+1)*(len(df)/run))].reset_index(drop=True))

        sample_Matrices = [run_samples[-1].iloc[:,m*k:(m+1)*k].T.reset_index(drop=True).T for m in range(n1)]

        mixed_Matrices = []
        elementary_effects = []
        f_elementary = []
        for m in range(n1):
            elementary_effects.append({f.__name__:pd.DataFrame(columns=[j for j in range(k)]) for f in functions})
            for j in range(k):
                for q in range(n1-1):
                    mixed_Matrices.append(sample_Matrices[m].copy())
                    mixed_Matrices[-1][j]=sample_Matrices[np.roll(np.arange(n1),(n1-1)*m)[q+1]][j]
        
        for f in functions:
            for j in range(k):
                ef = []
                for ie,e in enumerate(elementary_effects):
                    el = []
                    for q in range(n1-1):
                        el.append(f(mixed_Matrices[(n1-1)*ie*k+j*(n1-1)+q])/(n1-1))
                    ef.append(((n1-1)/n1**2)*(f(sample_Matrices[ie])-pd.concat(el,axis=1).sum(axis=1))**2)
                e[f.__name__][j] = pd.concat(ef,axis=1).sum(axis=1)

            C_T = e[f.__name__].expanding(1).mean()
                
            Var = f(pd.concat(sample_Matrices).sort_index()).expanding(1).var(ddof=0)
            T = (C_T.T/Var[~Var.index.duplicated(keep='last')]).T
            AE_r = np.abs(T - AE_df.loc[f.__name__])
            SE_r = (T - AE_df.loc[f.__name__])**2
            RE_r = AE_r.div(AE_df.loc[f.__name__])
            RE_l[f.__name__].append(RE_r)
            MAE_r = AE_r.mean(axis=1)
            MAE_r.index=(MAE_r.index+1)*len(sample_Matrices)*(1+k*(len(sample_Matrices)-1))
            SE_r.index = MAE_r.index
            MAE_dic[f.__name__][r]=MAE_r
            SE_dic[f.__name__].append(SE_r)

    for f in functions:
        RMSE_dic[f.__name__]= np.sqrt(pd.concat(SE_dic[f.__name__],axis=1).groupby(pd.concat(SE_dic[f.__name__],
                                                    axis=1).columns,1).mean()).mean(axis=1)
        RE_dic[f.__name__]= pd.concat(RE_l[f.__name__]).groupby(pd.concat(RE_l[f.__name__]).index).mean()
        RE_dic[f.__name__].index = MAE_dic[f.__name__].index
        
    writer = pd.ExcelWriter(str(n1)+'Lamboni_.xlsx', engine='xlsxwriter')
    for mk in MAE_dic.keys():
        MAE_dic[mk].to_excel(writer, sheet_name=mk)
    writer.save()
    
    writer = pd.ExcelWriter(str(n1)+'Lamboni_RMSE.xlsx', engine='xlsxwriter')
    for f in functions:
        RMSE_dic[f.__name__].to_excel(writer, sheet_name=f.__name__)
    writer.save()
    
    writer = pd.ExcelWriter(str(n1)+'Lamboni_RE.xlsx', engine='xlsxwriter')
    for f in functions:
        RE_dic[f.__name__].loc[[(n1*(1+k*(n1-1)))*(-1+2**s) for s in range(2,p+1)]].loc[:(k+1)*2**(p+1)].round(3).to_excel(writer, 
                                                                                    sheet_name=f.__name__)
    writer.save()

  """


The trends for the different estimators are then compared across column lenght and test functions.

In [None]:
ls = ['-',':','-.','--',(0, (3, 5, 1, 5, 1, 5))]
mk = ['s','o','v','+','*']

for f in functions:
    Saltenis = pd.read_excel('Saltenis_asym.xlsx',sheet_name=f.__name__,index_col=0).loc[[(k+1)*(-1+2**s) for s in 
                                                                                        range(2,p+1)]].loc[:(k+1)*2**(p+1)]
    Saltenis_sym = pd.read_excel('2Lamboni.xlsx',sheet_name=f.__name__,index_col=0).loc[[2*(k+1)*(-1+2**s) for s in range(2,
                                                                                        p+1)]].loc[:(k+1)*2**(p+1)]
    Matrices3 = pd.read_excel('3Lamboni.xlsx',sheet_name=f.__name__,index_col=0).loc[[(3*(1+k*(3-1)))*(-1+2**s) for s in range(2,
                                                                                        p+1)]].loc[:(k+1)*2**(p+1)]
    Matrices4 = pd.read_excel('4Lamboni.xlsx',sheet_name=f.__name__,index_col=0).loc[[(4*(1+k*(4-1)))*(-1+2**s) for s in range(2,
                                                                                        p+1)]].loc[:(k+1)*2**(p+1)]
    Matrices6 = pd.read_excel('6Lamboni.xlsx',sheet_name=f.__name__,index_col=0).loc[[(6*(1+k*(6-1)))*(-1+2**s) for s in range(2,
                                                                                        p+1)]].loc[:(k+1)*2**(p+1)]
    
    dataSets = [Saltenis,Saltenis_sym,Matrices3,Matrices4,Matrices6]

    dSnames = ['Saltenis','Saltenis_sym','Lamboni_3_matrices','Lamboni_4_matrices','Lamboni_6_matrices']
    
    dS_dic = dict(zip(dSnames,dataSets))
    
    co = -1
    for dk, dv in dS_dic.items():
        co +=1
        x_vals = dv.index
        y1 = dv.values
        plt.loglog(x_vals, y1, c='k', marker = mk[co],linestyle=ls[co],label = dk)
    plt.xlabel('Cost')
    plt.ylabel('MAE')
    plt.title(f.__name__)
    plt.legend()
    plt.show()