In [1]:
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline



# Fluctuation analysis for fixed-levels algorithm
In this section we will do some analysis of the comportment of $\sqrt N\frac{\hat p - p}{p}$. In the idealized situation, by the thm of CTL, it converges to a normal law and we are interested in estimating the variance of this normal distribution.

In [2]:
from numba import autojit
@autojit
def S(X):
    '''score function which is a black box'''
    return np.abs(X)

## Calculate the idealized levels

In [407]:
from scipy.stats import norm

# sequence of levels: idealized situation 

q_test = 4
p = (1-norm.cdf(q_test))*2

###idealized situation
p_0 = 0.5 #success rate
n_0 = int(np.log(p)/np.log(p_0))
r = p/(p_0**n_0)

print ("p_0 = ", p_0, '\t n_0 =',n_0,"\t r = ",r)

L = [-np.Inf]
for k in range(1,n_0+1,1):
    L = np.append(L, norm.ppf(1 - p_0**k/2))
L_ideal = np.append(L, q_test)
num_lev = len(L_ideal)

##var_relative
sigma_theoretical = np.sqrt(n_0*(1-p_0)/p_0 + (1-r)/r)
print ("sequence of levels: ", L_ideal)
print ("num_lev: ",num_lev)
print ("level interested, L = ",q_test)
# real value of p
print ("real value of p:" ,p)
print ("theoretical std of relative variation: ", sigma_theoretical)
print ("tested, no bug")

('p_0 = ', 0.5, '\t n_0 =', 13, '\t r = ', 0.51890162619383773)
('sequence of levels: ', array([       -inf,  0.67448975,  1.15034938,  1.53412054,  1.86273187,
        2.15387469,  2.41755902,  2.66006747,  2.88563491,  3.09726908,
        3.29719335,  3.4871041 ,  3.66832929,  3.84193069,  4.        ]))
('num_lev: ', 15)
('level interested, L = ', 4)
('real value of p:', 6.3342483666239957e-05)
('theoretical std of relative variation: ', 3.731909371167121)
tested, no bug


## Algorithm (fixed_levels)

In [416]:
#tuning parameter 
####Attention !!!!!!!!   we could not choose a very large sigma_1 !!!
sigma_1 = 0.2
std_tuning = np.sqrt(sigma_1**2)/(1+sigma_1**2)
c = np.sqrt(1+sigma_1**2)
def mu_0_test(N):
    '''The distribution of X that we could simulate'''
    return np.random.normal(0,1,N)

# def shaker_test(X, sigma_1):
#     std_tuning = np.sqrt(sigma_1**2)/(1+sigma_1**2)
#     c = np.sqrt(1+sigma_1**2)
#     return np.random.normal(X/c,std,1)

def shaker_test(X):
    return np.random.normal(X/c,std_tuning,1)
    

def simulation_fixed_levels(N , L, score_function, mu_0, shaker, status_tracking = False):





    # ###Estimation of p

     # number of samples
    X= mu_0(N)
    list_p_hat = []

    for k in range(num_lev-1):

        #print ("k = ", k ) 

    ###### construction of I_k 

        I = [X[j] for j in range(N) if score_function(X[j])>L[k+1]]
        l = len(I)
        list_p_hat = np.append(list_p_hat, l/np.float(N))
        #print ("estimation of p_k" ,list_p_hat[k])



        X_tilde = np.zeros(N)
        X_tilde[0:l] = I
        I = np.random.permutation(I)
        for j in range(l,N,1):
            X_tilde[j] = I[j%l]

        for j in range(N):            
            X_iter = shaker(X_tilde[j])
            if score_function(X_iter)>L[k+1]:
                X[j] = X_iter
            else:
                X[j] = X_tilde[j]


    p_hat = np.prod(list_p_hat)
    
    var_rel = np.sqrt(N) * (p - p_hat)/p
    if status_tracking == True:
        #print ("std_tuning: ",std_tuning)
        print ("levels: ", L)
        print ("real value of p:" ,p)
        print ("estimation of p: ", p_hat)
        print ("relative variation: ", var_rel)
        print ("N: ",N)
    return p_hat, var_rel


#### we remark that the choice of shaker (here, the sigma_1) will greatly inflence the quality of estimation.

In [424]:
#test, sigma_1 = 0.2
simulation_fixed_levels(N = 1000, L = L_ideal
                        , score_function = S , mu_0 = mu_0_test, shaker = shaker_test, status_tracking = True)

('levels: ', array([       -inf,  0.67448975,  1.15034938,  1.53412054,  1.86273187,
        2.15387469,  2.41755902,  2.66006747,  2.88563491,  3.09726908,
        3.29719335,  3.4871041 ,  3.66832929,  3.84193069,  4.        ]))
('real value of p:', 6.3342483666239957e-05)
('estimation of p: ', 7.991231299085596e-05)
('relative variation: ', -8.2722365896055301)
('N: ', 1000)


(7.991231299085596e-05, -8.2722365896055301)

In [None]:
n_choice_N = 3
n_sim = 500
#N = 100, 1000, 10000, 100000
sim_N =[[ [] for j in range(n_sim)] for i in range(n_choice_N)]
print ("Total number of simulation: ", n_choice_N)
for index_sim in range(n_choice_N):
    for i in range(n_sim):
        
        sim_N[index_sim][i] = simulation_fixed_levels(N = 100*10**index_sim, L = L_ideal
                        , score_function = S , mu_0 = mu_0_test, shaker = shaker_test)
    print ("simulation completed: ",index_sim+1)
sim_N = np.array(sim_N)

('Total number of simulation: ', 3)
('simulation completed: ', 1)
('simulation completed: ', 2)

In [None]:
relative_variation_sim = np.array([sim_N[i][:,1] for i in range(n_choice_N)])
print ("std of relative variation (N = 100, 1000, 10000): ", [np.std(relative_variation_sim[i]) for i in range(n_choice_N)])/np.array([10,np.sqrt(1000),100])
print ("theoretical std of relative variation: ", sigma_theoretical)
print ("\sqrt N * std of relative variation (N = 100, 1000, 10000): ", [np.std(relative_variation_sim[i]) for i in range(n_choice_N)])

In [None]:
##fluctuation
plt.figure(figsize = [15,15])
plt.subplot(3,1,1)

sns.distplot(relative_variation_sim[0] ,bins = 50, label = "empirical")
plt.title('Histogram of relative variation (N =100, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()

plt.subplot(3,1,2)

sns.distplot(relative_variation_sim[0] ,bins = 50, label = "empirical")
plt.title('Histogram of relative variation (N =1000, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()

plt.subplot(3,1,3)

sns.distplot(relative_variation_sim[0] ,bins = 50, label = "empirical")
plt.title('Histogram of relative variation (N =10000, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()

In [None]:
np.std(relative_variation_sim[2])