## Comparison of the Glen-and-Isaacs and the Saltenis estimator

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import sobol_seq
import matplotlib.pyplot as plt
from pandas import ExcelWriter
plt.style.use('ggplot')

### Defining the test functions

In [2]:
k = 6

a2 = np.array([0,0.5,3,9,99,99])
b3 = np.array([6.42,6.42,6.42,6.42,6.42,6.42])

def A1(sm):
    return pd.Series([np.prod(sm.iloc[:,:j+1],axis=1)*(-1)**(j+1) for j in range(k)]).sum()

def A2(sm):
    return pd.Series([(np.abs(4*sm[j]-2)+a2[j])/(1+a2[j]) for j in range(k)]).product()

def A2b(sm,sn):
    return pd.Series([(np.abs(4*(sm[j]+sn[j]-np.modf(sm[j]+sn[j])[1])-2)+a2[j])/(1+a2[j]) for j in range(k)]).product()

def B1(sm):
    return pd.Series([(k-sm[j])/(k-0.5) for j in range(k)]).product()
        
def B2(sm):
    return ((1+1/k)**k)*pd.Series([sm[j]**(1/k) for j in range(k)]).product()
        
def B3(sm):
    return pd.Series([(np.abs(4*sm[j]-2)+b3[j])/(1+b3[j]) for j in range(k)]).product()
        
def C1(sm):
    return pd.Series([np.abs(4*sm[j]-2) for j in range(k)]).product()
        
def C2(sm):
    return sm.product(axis=1)*2**k

functions = [A1, A2, B1, B2, B3, C1, C2]

### And the analytical values

In [4]:
def Ek2(k):
    return((1/6)*(1-(1/3)**k)+(4/15)*((-1)**(k+1)*(1/2)**k+(1/3)**k))

def V(k):
    return((1/10)*(1/3)**k+(1/18)-(1/9)*(1/2)**(2*k)+(-1)**(k+1)*(2/45)*(1/2)**k)

for j in range(0, k):
    def Ej(j):
        return((1/6)*(1-(1/3)**(j))+(4/15)*((-1)**(j+1)*(1/2)**(j)+(1/3)**(j)))
              
    def T1(j):
        return((1/2)*((1/3)**(j-1)*(1-(1/3)**(k-j))))

    def T2(j):
        return((1/2)*((1/3)**j-(1/3)**k))
    
    def T3(j):
        return((3/5)*(4*(1/3)**(k+1)+(-1)**(j+k+1)*(1/2)**(k-j-2)*(1/3)**(j+1)))
    
    def T4(j):
        return((1/5)*((-1)**(j+2)*(1/3)*(1/2)**(j-2)-4*(1/3)**(j+1)))
    
    def T5(j):
        return((1/5)*((-1)**(k+1)*(1/3)*(1/2)**(k-2)+(-1)**(k+j)*(1/3)**(j+1)*(1/2)**(k-j-2)))
    
def A1ST(j):
    return((Ek2(k)-Ej(j)-(1/4)*(T1(j)-2*T2(j)+T3(j))-T4(j)-T5(j))/V(k))

#ÁEA2

def VjA2(j):
    return((1/3)/(1+a2[j])**2)

def VA2(j):
    productA2 = []
    for j in range(0, k):
        productA2.append(1+VjA2(j))
    return np.product(productA2)-1

def VnA2(j):
    return((VA2(j)+1)/(1+VjA2(j)))

def VTjA2(j):
    return VjA2(j)*(VnA2(j))

def A2ST(j):
    return(VTjA2(j)/VA2(j))

#AEB1

def q(j):
    return(12*(k-0.5)**2)
        
def B1ST(j):
    return((q(j)+1)**k/((q(j)+1)*((q(j)+1)**k-q(j)**k)))

#AEB2

def B2ST(j):
    return((k+1)**(2*k-2)/(((k+1)**(2*k)-(k**2+2*k)**k)))

#AEB3

def VjB3(j):
    return((1/3)/(1+b3[j])**2)

def VB3(j):
    productB3 = []
    for j in range(0, k):
        productB3.append(1+VjB3(j))
    return np.product(productB3)-1

def VnB3(j):
    return((VB3(j)+1)/(1+VjB3(j)))

def VTjB3(j):
    return VjB3(j)*(VnB3(j))

def B3ST(j):
    return(VTjB3(j)/VB3(j))

#AEC1

def C1ST(j):
    return 4**(k-1)/(4**k-3**k)

#AEC2
        
def C2ST(j):
    return 4**(k-1)/(4**k-3**k)

def create_dict(key, values):
    return dict(zip(key, values))

analyticalValues = [A1ST,A2ST,B1ST,B2ST,B3ST,C1ST,C2ST]

AE = []
AE_l = []
AE_names = []
AE_namesd = []
for iw,w in enumerate(analyticalValues):
    AE_names.append(str(w.__name__)[:2])
    for j in range (0,k):
        AE.append(w(j))
        AE_namesd.append('AE' + str(w.__name__) + str(j))
AE_df = pd.DataFrame([AE[k*iw:k*(iw+1)] for iw in range(len(analyticalValues))],AE_names)

### It is then time to define the sample and the scrambled matrices

In [5]:
p = 13
run = 50

n = 2

df = pd.DataFrame(sobol_seq.i4_sobol_generate(2*k, run*2**p))

### One can finally estimate the figures for the Sobol sensitivity indices through the Glen&Isaacs estimator

In [6]:
run_samples = []

MAE_dic = {f.__name__:pd.DataFrame(columns=[r for r in range(run)]) for f in functions}
for r in range (run):
    run_samples.append(df.iloc[int(r*(len(df)/run)):int((r+1)*(len(df)/run))].reset_index(drop=True))

    sample_Matrices = [run_samples[-1].iloc[:,m*int(len(run_samples[-1].T)/n):(m+1)*int(len(run_samples[-1].T)/n)].T.reset_index(drop=True).T
                       for m in range(n)]
    
    mixed_Matrices = []
    
    co = [1,0]
    for m in range(n):
        for j in range(k):
            mixed_Matrices.append(sample_Matrices[m].copy())
            mixed_Matrices[-1][j]=sample_Matrices[co[m]][j]
            
    gv_dic = {f.__name__:[] for f in functions}
    gv1_dic = {f.__name__:pd.DataFrame(columns=[j for j in range(k)]) for f in functions}
    gv2_dic = gv_dic.copy()
    gv3_dic = gv1_dic.copy()
    pj_dic = gv1_dic.copy()
    cd_dic = gv1_dic.copy()
    cdM_dic = gv1_dic.copy()
    ca_dic = gv1_dic.copy()
    caM_dic = gv1_dic.copy()
    ST_dic = gv1_dic.copy()
    
    for f in functions:
        gv_dic[f.__name__] = ((f(sample_Matrices[0])-f(sample_Matrices[0]).expanding(1).mean()).T/
            np.sqrt(f(sample_Matrices[0]).expanding(1).var(ddof=0))).T
        gv2_dic[f.__name__] = ((f(sample_Matrices[1])-f(sample_Matrices[1]).expanding(1).mean()).T/
            np.sqrt(f(sample_Matrices[1]).expanding(1).var(ddof=0))).T
        gv1_dic[f.__name__] = pd.DataFrame([((f(mixed_Matrices[j])-f(mixed_Matrices[j]).expanding(1).mean()).T/
            np.sqrt(f(mixed_Matrices[j]).expanding(1).var(ddof=0))).T for j in range(k)]).T
        gv3_dic[f.__name__] = pd.DataFrame([((f(mixed_Matrices[j+k])-f(mixed_Matrices[j+k]).expanding(1).mean()).T/
            np.sqrt(f(mixed_Matrices[j+k]).expanding(1).var(ddof=0))).T for j in range(k)]).T
        pj_dic[f.__name__] = pd.DataFrame([0.5*(gv_dic[f.__name__]*gv2_dic[f.__name__]+
                                       gv1_dic[f.__name__][j]*gv3_dic[f.__name__][j]).expanding(1).mean() for j in range(k)]).T
        cd_dic[f.__name__]=pd.DataFrame([0.5*(gv_dic[f.__name__]*gv3_dic[f.__name__][j]+
                                       gv2_dic[f.__name__]*gv1_dic[f.__name__][j]).expanding(1).mean() for j in range(k)]).T
        cdM_dic[f.__name__]=pd.DataFrame([0.5*(gv_dic[f.__name__]*gv1_dic[f.__name__][j]+
                                   gv2_dic[f.__name__]*gv3_dic[f.__name__][j]).expanding(1).mean() for j in range(k)]).T
        ca_dic[f.__name__]=pd.DataFrame([(cd_dic[f.__name__][j]-pj_dic[f.__name__][j]*cdM_dic[f.__name__][j])/\
        (1-(pj_dic[f.__name__][j])**2) for j in range(k)]).T
        caM_dic[f.__name__]=pd.DataFrame([(cdM_dic[f.__name__][j]-pj_dic[f.__name__][j]*cd_dic[f.__name__][j])/\
        (1-(pj_dic[f.__name__][j])**2) for j in range(k)]).T
        ST_dic[f.__name__]=pd.DataFrame([1-cdM_dic[f.__name__][j]+pj_dic[f.__name__][j]*ca_dic[f.__name__][j]/\
        (1-ca_dic[f.__name__][j]*caM_dic[f.__name__][j]) for j in range(k)]).T
        
        AE_r = np.abs(ST_dic[f.__name__] - AE_df.loc[f.__name__])
        MAE_r = AE_r.mean(axis=1)
        MAE_r.index=(MAE_r.index+1)*len(sample_Matrices)*(k+1)
        MAE_dic[f.__name__][r]=MAE_r

In [7]:
writer = pd.ExcelWriter('G&I.xlsx', engine='xlsxwriter')

for mk in MAE_dic.keys():
    MAE_dic[mk].mean(axis=1).to_excel(writer, sheet_name=mk)
    
writer.save()