In [None]:
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline

# Fluctuation analysis for adapted-levels algorithm
In this section we will do some analysis of the comportment of $\sqrt N\frac{\hat p - p}{p}$. In the idealized situation, by the thm of CTL, it converges to a normal law and we are interested in estimating the variance of this normal distribution.

In [None]:
from numba import autojit
@autojit
def S(X):
    '''score function which is a black box'''
    return np.abs(X)

In [None]:
q_test = 4
from scipy.stats import norm
p = (1-norm.cdf(q_test))*2

p_0 = 0.75 #success rate
n_0 = int(np.log(p)/np.log(p_0))
r = p/(p_0**n_0)
sigma_theoretical = np.sqrt(n_0*(1-p_0)/p_0 + (1-r)/r)
print ("theoretical std of relative variation: ", sigma_theoretical)
print "real value of p:" ,p

In [None]:

p_0 = 0.75 # prescribed success rate
N = 50000 #size of sample

def mu_0_test(N,q_test):
    '''The distribution of X that we could simulate'''
    ## To ensure that L_k != empty
    X = np.random.normal(0,1,N)
    while(np.sum((S(X)>q_test)) == 0):
        X = np.random.normal(0,1,N)
    return X







def L_empirical(X,alpha, N, score_function):
    
    
    return np.sort(score_function(X))[np.int((1-alpha)*N)]


def simulation_adapted_levels(p_0, N, mu_0, q_test, score_function, status_track = False):
###Estimation of p


    X = mu_0(N,q_test)
    L = np.array([-np.Inf,L_empirical(X ,p_0 ,N,score_function )])
    k = 1

    while(L[k]<q_test):
        if status_track == True:
            print "\t"
            print "k = ",k
            print 'current level: ', L[k]
        I = []
        for i in range(N):
            if S(X[i])>L[k]:
                I = np.append(I, X[i])
        l = len(I)
        

        X[0:l] = I

    ########## permutation trick to replace multinominal distribution
        I = np.random.permutation(I)
        for i in range(l,N,1):
            X[i] = I[i%l]    

        L = np.append(L, L_empirical(X,p_0 ,N,score_function))
        k += 1

    

    N_L = np.sum((S(X)>q_test))
    p_hat = N_L/float(N)*p_0**(k-1)
#     L_adapted = L[0:-1]
#     L_adapted = np.append(L_adapted, q_test)
    
    if status_track ==True:
        print ("final k = ",k)
        print ("real value of p:" ,p)
        print ("estimation of p: ", p_hat)
        print ("relative variation: ",(p - p_hat)/p*np.sqrt(N))
    return p_hat, (p - p_hat)/p*np.sqrt(N)


In [None]:
#test
simulation_adapted_levels(p_0 = 0.75, N = 1000, mu_0 = mu_0_test, q_test = 5, score_function = S, status_track = True)

In [None]:
list_N = [100,500,1000,3000]
n_choice_N = len(list_N)
n_sim = 1000
#N = 100, 1000, 10000, 100000
sim_N =[[ [] for j in range(n_sim)] for i in range(n_choice_N)]
print ("Total number of simulation: ", n_choice_N)
for index_sim in range(n_choice_N):
    for i in range(n_sim):
        simulation_adapted_levels(p_0 = 0.75, N = 1000,
                                  mu_0 = mu_0_test, q_test = 4, score_function = S, status_track = False)
        
    print ("simulation completed: ",index_sim+1)
sim_N = np.array(sim_N)

In [None]:
relative_variation_sim = np.array([sim_N[i][:,1] for i in range(n_choice_N)])
print ("theoretical std of relative variation: ", sigma_theoretical)
std_sqrtN = np.std([np.std(relative_variation_sim[i]) for i in range(n_choice_N)])
print ("\sqrt N * std of relative variation : ", std_sqrtN 

## Fluctuation Analysis

In [None]:
##fluctuation
plt.figure(figsize = [15,18])
plt.subplot(4,1,1)

sns.distplot(relative_variation_sim[0] ,bins = 100, label = "empirical")
plt.title('Histogram of relative variation (N =100, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()
plt.xlim([-50,50])

plt.subplot(4,1,2)

sns.distplot(relative_variation_sim[0] ,bins = 100, label = "empirical")
plt.title('Histogram of relative variation (N =500, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()
plt.xlim([-50,50])

plt.subplot(4,1,3)

sns.distplot(relative_variation_sim[0] ,bins = 100, label = "empirical")
plt.title('Histogram of relative variation (N =1000, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()
plt.xlim([-50,50])

plt.subplot(4,1,4)

sns.distplot(relative_variation_sim[0] ,bins = 100, label = "empirical")
plt.title('Histogram of relative variation (N =3000, q_test = 4)')
x = np.arange(-15,15,0.1)
plt.plot(x,norm.pdf(x,0,sigma_theoretical), label = "theoretical")
plt.legend()
plt.xlim([-50,50])

In [None]:
plt.figure(figsize = [10,5])
plt.plot(np.array(list_N), std_sqrtN/ np.sqrt(list_N), label = "simulated")
plt.plot(np.array(list_N),  sigma_theoretical /np.sqrt(list_N), label = "theoretical"    ) 
plt.legend()

