In [1]:
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline



# Toy Example (dim = 1)
* $X \sim \mathcal{N}(0,1)$ r.v that we could simulate
* $S:\mathbb{R} \to \mathbb{R}$ score function / blackbox i.e. we could simulate $S(X)$, but we don't know the form of $S$. here we take $S(X) = |X| $
* Goal : estimate $p = \mathbb{P}(S(X)>q) < 10^{-6}$ ( q = 5 rare event)

In [442]:
from numba import autojit
@autojit
def S(X):
    '''score function which is a black box'''
    return np.abs(X)


### remark
(cf. Sequential Monte Carlo for Rare Event Estimation(F.Cérou, P.Del Moral, T.Furon, A.Guyader)):

## Fixed-levels Algorithm:

#### Parameters:

N: the number of particles

$\{L_0,...,L_n\}$: the sequence of levels, where $L_0 = -\infty$

#### Initialization

Draw an i.i.d. N-sample $(X_0^j)_{1\leq j\leq N}$ of law $\mu$

#### Iterations

*for k = 0 to n-1:*

Let $I_k  = \{j : X_k^j \in A_{k+1}\}$ where $A_{k+1} = \{x \in \mathbb{R}^d : S(x) > L_k\}$

Let $\hat p_k = \frac{\#|T_k|}{N}$

if $j \in I_k$, let $\tilde X_{k+1}^j = X_k^j$
if $j \notin I_k$, let $\tilde X_{k+1}^j$ be a copy of $X_k^l$ where $l$ is chosen randomly in $I_k$ with uniform
probabilities.



############# *Question* ################

(I think there would be a problem when $I_k$ is empty, which would happen with a high proba when the level becomes large (i.e the $A_{k+1}$ is rare), so i decide to regenerate an $X_{new} \sim \mathcal{L}( X | S(X)>L_{k+1})$. But it increase the complexibility greatly. )

1. if the size I_k is very large, then the procedure mentioned in the paper gives us a new r.v. who converges in distribution(at least) to the law $\mathcal{L}(X|X \in A_{k+1})$ 

2. This procedure is far more effecient than the generation that I used here. However, to ensure that $I_k$ is not empty for every level k (this could not ensure the convergence of the r.v. constructed), we should draw a big number of particles.

########################################


*for j from 1 to N:*

Draw a new particle $\hat X_{k+1}^j \sim K(\tilde X_{k+1}^j,\cdot )$

if $\hat X_{k+1}^j \in A_{k+1}$, let $X_{k+1}^j = \hat X_{k+1}^j$, else let $X_{k+1}^j = \tilde X_{k+1}^j$

(that is to say, with the condition that $X \in A_{k+1}$, we will only accept the transition in $A_{k+1}$ )

#### Output

Estimate the proba of the rare events by $\hat p = \prod_{k = 0}^{n - 1} \hat p_k $

In [1010]:
# sequence of levels
num_lev = 10
#q = np.array([1.1,1.2,1.25,1.5,1.8])
q_test = 4
q = np.append(-np.Inf,np.sort(np.random.uniform(0.5,q_test,num_lev)))
q[num_lev] = q_test
print q

# real value of p
from scipy.stats import norm
p = (1-norm.cdf(q[num_lev]))*2
print "real value of p:" ,p

[       -inf  0.66908911  0.67488404  1.22825929  1.45406456  1.98015034
  2.5256638   2.71088547  3.00314609  3.59966322  4.        ]
real value of p: 6.33424836662e-05


In [1011]:
#tuning parameter 
sigma_1 = 1
var_tuning = float(sigma_1**2)/(1+sigma_1**2)
print "var_tuning: ",var_tuning
print "levels: ", q


###Estimation of p

N = 10000 # number of samples
X = np.zeros([num_lev,N])
X[0,:] = np.random.normal(0,1,N) 
p_hat = []
#I = [[0] for k in range(num_lev-1)]
for k in range(num_lev-1):
    p_hat = np.append(p_hat,np.sum((S(X[k,]>q[k+1])))/float(N))
    print p_hat[k]
#     for j in range(N):
#         if S(X[k,j])>q[k+1]:
#             I[k] = np.append(I[k],X[k,j])
    for j in range(N):
        if S(X[k,j])>q[k+1]:
            X[k+1,j] = X[k,j]
        else:
#             l = np.random.choice(I[k])
#             X[k+1,j] = X[k,l]
            X_tilde = np.random.normal(0,1,1)
            while(S(X_tilde)<=q[k+1]):
                X_tilde = np.random.normal(0,1,1)
                #thr += 1
            X[k+1,j] = X_tilde
                
    
        X_iter = np.random.normal(X[k+1,j],np.sqrt(var_tuning),1)
        if S(X_iter)>q[k+1]:
            X[k+1,j] = X_iter
    
    


print "real value of p:" ,p
print "estimation of p: ", np.prod(p_hat)
print "relative variation: ",np.abs((p - np.prod(p_hat))/p)*np.sqrt(N)
print "N: ",N






var_tuning:  0.5
levels:  [       -inf  0.66908911  0.67488404  1.22825929  1.45406456  1.98015034
  2.5256638   2.71088547  3.00314609  3.59966322  4.        ]
0.2446
0.4916
0.3042
0.3917
0.3008
0.2749
0.389
0.3573
0.263
real value of p: 6.33424836662e-05
estimation of p:  4.33083906915e-05
relative variation:  31.6282087711
N:  10000


In [1012]:
# # we will estimate p for different N 
# p_list = []
# variation = []
# for N in np.array(np.linspace(500,10000,100),np.int):
#     X = np.zeros([num_lev,N])
#     X[0,:] = np.random.normal(0,1,N) 
#     #p_hat = []
#     #I = [[0] for k in range(num_lev-1)]
#     for k in range(num_lev-1):
#         p_hat = np.append(p_hat,np.sum((S(X[k,]>q[k+1])))/float(N))
#         print p_hat[k]
#     #     for j in range(N):
#     #         if S(X[k,j])>q[k+1]:
#     #             I[k] = np.append(I[k],X[k,j])
#         for j in range(N):
#             if S(X[k,j])>q[k+1]:
#                 X[k+1,j] = X[k,j]
#             else:
#     #             l = np.random.choice(I[k])
#     #             X[k+1,j] = X[k,l]
#                 X_tilde = np.random.normal(0,1,1)
#                 while(S(X_tilde)<=q[k+1]):
#                     X_tilde = np.random.normal(0,1,1)
#                     #thr += 1
#                 X[k+1,j] = X_tilde


#             X_iter = np.random.normal(X[k+1,j],np.sqrt(var_tuning),1)
#             if S(X_iter)>q[k+1]:
#                 X[k+1,j] = X_iter



#     variation = np.append(variation,np.abs((p - np.prod(p_hat))/p)*np.sqrt(N))
#     p_list = np.append(p_list,np.prod(p_hat))
#     print "real value of p:" ,p
#     print "estimation of p: ", np.prod(p_hat)
#     print "relative variation: ",np.abs((p - np.prod(p_hat))/p)*np.sqrt(N)
#     print "N: ",N



In [1013]:
# plt.figure(figsize=[15,5])
# plt.plot(variation*(variation < 100000),"p")
# print np.mean(variation)
# print np.sum(variation < 50000)
# plt.title("relative residus of $\hatp$")

In [1014]:
# plt.figure(figsize = [15,5])
# plt.hist(variation,bins = 20)
# plt.title("Histogram of relative residus")
# a = np.array([(1-norm.cdf(q[i]))*2 for i in range(num_lev)])[1:]
# print 'var: ', np.mean(variation)
# print "'minimum' var(theoretic):",np.sum((1-a)/a)
# print "#{var<var(theoretic)}: ",np.sum(variation < np.sum((1-a)/a))
# print "1-sigma : ",1- (1-norm.cdf(1))*2

In [1015]:
# plt.figure(figsize = [15,5])
# plt.hist(p_list,bins = 50)
# plt.title("Histogramme of $\hatp$")


### remark
(cf. Sequential Monte Carlo for Rare Event Estimation(F.Cérou, P.Del Moral, T.Furon, A.Guyader)):


## Adaptive Multilevel Splitting    

#### parameter:

$N$:  the number of particles

$N_0$: the number of succeeding particles

$p_0 = \frac{N_0}{N}$ : the success rate

#### Initialization

Draw an i.i.d. N-sample $(X_0^j)_{1\leq j\leq N}$ of law $\mu$

Compute the $\hat L_1$, the $(1-p_0)$ quantile of $(S(X_0^j))_{1\leq j \leq N}$

k = 1, index of level

#### Iterations



In [977]:
q_test = 4
from scipy.stats import norm
p = (1-norm.cdf(q_test))*2
print "real value of p:" ,p




p_0 = 0.5 # prescribed success rate
N = 1000 #size of sample
max_iter = 1000

# calculate the empirical quantile of X
@autojit
def L_empirical(X,percentage = p_0):
    
    return np.percentile(S(X),(1-percentage)*100,interpolation="lower")

###Estimation of p

X = np.random.normal(0,1,N)
L = np.array([-np.Inf,L_empirical(X)])
k = 1
while(L[k]<q_test):
    print k
    print L[k]
    X_new = []
    for i in range(N):
        if S(X[i])>L[k]:
            X_new = np.append(X_new, X[i])
        else:
            X_tilde = np.random.normal(0,1,1)
            #thr = 1      #add a threshold control the max_iter? seems that it's not possible !
            #while((S(X_tilde)<=L[k])*(thr< max_iter) == 1 ):
            while(S(X_tilde)<=L[k]):
                X_tilde = np.random.normal(0,1,1)
                #thr += 1
            X_new = np.append(X_new, X_tilde)
                
            
    
    X = X_new
    L = np.append(L, L_empirical(X))
    k += 1
    
    N_L = np.sum((S(X)>=q_test))
    p_hat = N_L/float(N)*p_0**(k-1)
    print p_hat

print "k = ",k
    
N_L = np.sum((S(X)>=q_test))
p_hat = N_L/float(N)*p_0**(k-1)


print "real value of p:" ,p
print "estimation of p: ", p_hat
print "relative variation: ",np.abs((p - p_hat))/p*np.sqrt(N)
print "N: ",N



real value of p: 6.33424836662e-05
1
0.675499383908
0.0
2
1.11286763785
0.0
3
1.49415349472
0.000125
4
1.82124934917
6.25e-05
5
2.14037223275
6.25e-05
6
2.40616451648
6.25e-05
7
2.65810268223
7.8125e-05
8
2.8876049211
8.203125e-05
9
3.0884178865
8.3984375e-05
10
3.29961139548
7.421875e-05
11
3.48474163674
7.32421875e-05
12
3.68250221206
6.73828125e-05
13
3.85692120128
6.82373046875e-05
k =  14
real value of p: 6.33424836662e-05
estimation of p:  6.82373046875e-05
relative variation:  2.44366533646
N:  1000


In [1016]:
print "levels (adapted version): ", L


levels (adapted version):  [       -inf  0.67549938  1.11286764  1.49415349  1.82124935  2.14037223
  2.40616452  2.65810268  2.88760492  3.08841789  3.2996114   3.48474164
  3.68250221  3.8569212   4.03670314]
