In [1]:
import numpy as np
from scipy.stats import qmc


In [2]:
def f(u):
    """
    u est un vecteur de taille d>=1 -> np.array
    """
    d = len(u)
    somme = np.sum(u)
    return np.cos(2*np.pi*(1/d * somme -0.5))

f(np.array([1,0.5,0.3,0.4]))

np.float64(0.9510565162951535)

# Question 1

$$\int_{[0,1]^d}^{}{f(u)du}  \simeq \frac{1}{N}\sum_{i=1}^{N}f(u_{(i)}) \text{ with } u_{(i)} \sim^{iid} U([0,1]^d)

In [3]:
# Monte Carlo standard
def monte_carlo_integration(d, N):
    samples = np.random.uniform(0, 1, (N, d))
    return np.mean(np.array([f(sample) for sample in samples]))

# Quasi-Monte Carlo (Sobol sequence)
def quasi_monte_carlo_integration(d, N):
    sobol = qmc.Sobol(d, scramble=True)
    samples = sobol.random(N)
    return np.mean(np.array([f(sample) for sample in samples]))

# Différentes valeurs de d et N
d_values = [1, 2, 3,5, 10, 100, 200]
N_values = [100, 1000, 10000, 20000]

# Stocker les résultats
results = {}

for d in d_values:
    results[d] = {}
    for N in N_values:
        mc_result = monte_carlo_integration(d, N)
        qmc_result = quasi_monte_carlo_integration(d, N)
        results[d][N] = (mc_result, qmc_result)

# Affichage des résultats
for d in results:
    print(f"Dimension d = {d}")
    for N in results[d]:
        mc, qmc_res = results[d][N]
        print(f"  N = {N}: Monte Carlo = {mc:.6f}, Quasi-Monte Carlo = {qmc_res:.6f}")
    print()

  sample = self._random(n, workers=workers)


Dimension d = 1
  N = 100: Monte Carlo = 0.015085, Quasi-Monte Carlo = -0.015158
  N = 1000: Monte Carlo = 0.019122, Quasi-Monte Carlo = 0.001125
  N = 10000: Monte Carlo = 0.005975, Quasi-Monte Carlo = 0.000141
  N = 20000: Monte Carlo = 0.001760, Quasi-Monte Carlo = 0.000002

Dimension d = 2
  N = 100: Monte Carlo = 0.471487, Quasi-Monte Carlo = 0.414825
  N = 1000: Monte Carlo = 0.416602, Quasi-Monte Carlo = 0.405535
  N = 10000: Monte Carlo = 0.406130, Quasi-Monte Carlo = 0.405363
  N = 20000: Monte Carlo = 0.406469, Quasi-Monte Carlo = 0.405307

Dimension d = 3
  N = 100: Monte Carlo = 0.536279, Quasi-Monte Carlo = 0.580652
  N = 1000: Monte Carlo = 0.551938, Quasi-Monte Carlo = 0.567298
  N = 10000: Monte Carlo = 0.568819, Quasi-Monte Carlo = 0.565590
  N = 20000: Monte Carlo = 0.563573, Quasi-Monte Carlo = 0.565538

Dimension d = 5
  N = 100: Monte Carlo = 0.759878, Quasi-Monte Carlo = 0.712243
  N = 1000: Monte Carlo = 0.698254, Quasi-Monte Carlo = 0.714866
  N = 10000: Monte C

Quand $d \to \infty$, on remarque que $I \to 1$ \
Pour la dimension $d=2$ par exemple, on peut calculer l'intégrale double et on trouve $I = \frac{4}{\pi^2} \simeq 0.40528$ et ça nous permet de remarquer que QMC converge plus vite vers le bon résultat.\
De même pour $d=1$, $I = 0$ par changement de variable, et on voit la vitesse de convergence plus rapide de QMC.

# Question 2

$$
e_{0,k} = \left\{ \left( \frac{2j_1 + 1}{2k}, \dots, \frac{2j_s + 1}{2k} \right) 
\text{ s.t. } (j_1, \dots, j_s) \in \{0, \dots, k-1\}^s \right\}
\ $$




$$\
\hat{I}_{1,k}(f) := \frac{1}{k^s} \sum_{c \in \mathcal{e}_0,k} f(c + U_c), \quad U_c \sim \mathcal{U} \left( \left[ -\frac{1}{2k}, \frac{1}{2k} \right]^s \right)
\


In [None]:

def haber_estimator_1(f, k, s):
    """
    Compute Haber's first estimator (order 1).
    
    Parameters:
        f: function to integrate
        k: number of subintervals per dimension
        s: dimension of the domain
    
    Returns:
        Approximate integral value using Haber's first estimator.
    """
    centers = np.array(np.meshgrid(*[np.linspace(1/(2*k), 1 - 1/(2*k), k) for _ in range(s)])).T.reshape(-1, s)
    U = np.random.uniform(-1/(2*k), 1/(2*k), size=centers.shape)
    return np.mean(f(centers + U))


def haber_estimator_2(f, k, s):
    """
    Compute Haber's second estimator (order 2).
    
    Parameters:
        f: function to integrate
        k: number of subintervals per dimension
        s: dimension of the domain
    
    Returns:
        Approximate integral value using Haber's second estimator.
    """
    centers = np.array(np.meshgrid(*[np.linspace(1/(2*k), 1 - 1/(2*k), k) for _ in range(s)])).T.reshape(-1, s)
    U = np.random.uniform(-1/(2*k), 1/(2*k), size=centers.shape)
    return np.mean((f(centers + U) + f(centers - U)) / 2)


# Test function
def test_function(x):
    return np.cos(2 * np.pi * (np.mean(x, axis=1) - 0.5))



In [6]:
d_values = [1, 2, 3]
k_values = [10, 20, 50, 100]  # Number of subintervals per dimension


results = {}

for d in d_values:
    results[d] = {}
    for k in k_values:
        I1 = haber_estimator_1(test_function, k, d)
        I2 = haber_estimator_2(test_function, k, d)
        results[d][k] = (I1,I2)

# Affichage des résultats
for d in results:
    print(f"Dimension d = {d}")
    for k in results[d]:
        mc, qmc_res = results[d][k]
        print(f"  N = {k**d}: Haber1 = {mc:.6f}, Haber2 = {qmc_res:.6f}")
    print()

Dimension d = 1
  N = 10: Haber1 = 0.062587, Haber2 = -0.000516
  N = 20: Haber1 = 0.008122, Haber2 = 0.000835
  N = 50: Haber1 = -0.001822, Haber2 = -0.000022
  N = 100: Haber1 = 0.000455, Haber2 = 0.000000

Dimension d = 2
  N = 100: Haber1 = 0.396897, Haber2 = 0.405147
  N = 400: Haber1 = 0.403108, Haber2 = 0.405288
  N = 2500: Haber1 = 0.405498, Haber2 = 0.405281
  N = 10000: Haber1 = 0.405291, Haber2 = 0.405283

Dimension d = 3
  N = 1000: Haber1 = 0.567892, Haber2 = 0.565688
  N = 8000: Haber1 = 0.566278, Haber2 = 0.565572
  N = 125000: Haber1 = 0.565560, Haber2 = 0.565597
  N = 1000000: Haber1 = 0.565592, Haber2 = 0.565596



Convergence d'Haber1 et d'Haber2 vers la vraie valeur de l'intégrale beaucoup plus rapide, par exemple en dimension 1 dès N=20 on est est déjà proche de 0 alors qu'il fallait attendre N=100 dans la Q1. \

On observe aussi une convergence plus rapide de la variance d'Haber2 grâce à la technique de réduction de la variance.

Plus k augmente, plus N augmente et plus l'estimateur d'Haber converge vers la valeur de l'intégrale.

In [None]:
d_values = [5,10]
k_values = [2,3,4]  # Number of subintervals per dimension


results = {}

for d in d_values:
    results[d] = {}
    for k in k_values:
        I1 = haber_estimator_1(test_function, k, d)
        I2 = haber_estimator_2(test_function, k, d)
        results[d][k] = (I1,I2)

# Affichage des résultats
for d in results:
    print(f"Dimension d = {d}")
    for k in results[d]:
        mc, qmc_res = results[d][k]
        print(f"  N = {k**d}: Haber1 = {mc:.6f}, Haber2 = {qmc_res:.6f}")
    print()

Dimension d = 5
  N = 32: Haber1 = 0.720534, Haber2 = 0.726921
  N = 243: Haber1 = 0.721927, Haber2 = 0.716624
  N = 1024: Haber1 = 0.713877, Haber2 = 0.717298

Dimension d = 10
  N = 1024: Haber1 = 0.845533, Haber2 = 0.847042
  N = 59049: Haber1 = 0.848460, Haber2 = 0.847936
  N = 1048576: Haber1 = 0.847851, Haber2 = 0.847855



# Question 3

### Step 1: Understanding the Approximate Distribution 

We analyze the distribution of the quantity:

$$
S_d = \frac{1}{d} \sum_{i=1}^{d} u_i
$$

when $( d )$ is large. Given that $( u_i \sim U(0,1) )$ (i.i.d.), we apply the **Central Limit Theorem (CLT)**. Since the expectation and variance of a uniform $( U(0,1) )$ variable are:

$$
\mathbb{E}[u_i] = \frac{1}{2}, \quad \text{Var}[u_i] = \frac{1}{12},
$$

we obtain:

$$
\mathbb{E}[S_d] = \frac{1}{d} \sum \mathbb{E}[u_i] = \frac{1}{2}
$$

$$
\text{Var}[S_d] = \frac{1}{d^2} \sum \text{Var}[u_i] = \frac{1}{12d}
$$
Then we have,

$$ \sqrt{d} \times \frac{S_{d} - \frac{1}{2}}{1/12} \simeq \mathcal{N} \left( 0, 1 \right)$$

Thus, for large $( d )$, we approximate:

$$
S_d \sim \mathcal{N} \left( \frac{1}{2}, \frac{1}{12d} \right)
$$



### Step  2 : Importance sampling

### Importance Sampling
$$\mathbb{E}_{x \sim P}[f(x)] = \mathbb{E}_{x \sim Q}\Big[f(x)\frac{P(x)}{Q(x)}\Big]$$
Which means  $\mathbb{E}_{x \sim P}[f(x)] \approx \frac{1}{n}\sum_{i=1}^nf(x_i)\frac{P(x_i)}{Q(x_i)}$ where $x_i$ are drawn from $Q$. This applies when $P$ and $Q$ are both normalized. For unnormalized case (not our case)   
$$\mathbb{E}_{x \sim P}[f(x)] \approx \frac{\sum_{i=1}^nf(x_i)\frac{P(x_i)}{Q(x_i)}}{\sum_{i=1}^n\frac{P(x_i)}{Q(x_i)}}$$  
Let the proposal distribution $Q(x)$ be a normal distribution.   


In [8]:
import numpy as np
from scipy.stats import multivariate_normal, norm

def uniform_pdf(u,d):
    """
    Densité de probabilité pour une distribution uniforme sur [0,1]^d.
    Comme c'est uniforme, la densité est constante et vaut 1 sur l'intervalle.
    """
    if np.all((u >= 0) & (u <= 1)):  # Vérifie que u est bien dans [0,1]^d
        return 1
    else:
        return 0  # En dehors de [0,1]^d, la densité est nulle

def normal_pdf(u,d):
    """
    Densité de probabilité pour une loi normale multidimensionnelle de dimension d  : N(mu, Sigma)
    avec mu = (1/2, ..., 1/2) et Sigma = (1/12d) * I_d (matrice identité).
    """
    mu = np.full(d, 0.5)  # Vecteur moyenne [1/2, 1/2, ..., 1/2]
    sigma2 = 1 / (12 * d)  # Variance pour chaque variable
    sigma = np.eye(d) * sigma2  # Matrice de covariance diagonale

    return multivariate_normal.pdf(u, mean=mu, cov=sigma)


In [9]:
def IS_MC(N,d, mu=1/2, sigma = None):
    if not sigma :
        sigma = 1/(12*d)
    somme = 0
    for i in range(N):
        sample = np.random.multivariate_normal(np.full(d, 0.5), np.eye(d) *1 / (12 * d))
        somme += f(sample) * (uniform_pdf(sample,d)/normal_pdf(sample,d))
    return somme/N

def IS_QMC(N,d, mu=1/2, sigma = None):
    if not sigma :
        sigma = 1/(12*d)
    """ Importance Sampling avec Quasi-Monte Carlo utilisant une séquence Sobol """
    sampler = qmc.Sobol(d, scramble=True)  # Génère des points Sobol dans [0,1]^d
    U = sampler.random(N)  # Matrice (N, d) avec des points uniformes

    # Transformation en loi normale N(0.5, 1/(12d)) avec la fonction quantile
    mu = 0.5
    sigma = np.sqrt(1 / (12 * d))
    X = norm.ppf(U, loc=mu, scale=sigma)  # Transforme U ~ U[0,1] en X ~ N(mu, sigma^2)


    # Importance Sampling
    somme = 0
    for x in X:
        somme += f(x) * (uniform_pdf(x,d) /normal_pdf(x, d))  # p(u) = 1 pour uniforme
    return somme / len(X)


In [10]:
d_values = [1, 2, 3, 5]
N_values = [1000, 5000, 10000, 20000, 50000]  # Number of subintervals per dimension


results = {}

for d in d_values:
    results[d] = {}
    for N in N_values:
        I1 = IS_MC( N, d)
        I2 = IS_QMC( N, d)
        results[d][N] = (I1,I2)

# Affichage des résultats
for d in results:
    print(f"Dimension d = {d}")
    for k in results[d]:
        mc, qmc_res = results[d][k]
        print(f"  N = {k}: IS MC = {mc:.6f}, IS QMC = {qmc_res:.6f}")
    print()

Dimension d = 1
  N = 1000: IS MC = -0.064902, IS QMC = -0.003318
  N = 5000: IS MC = -0.002290, IS QMC = -0.000282
  N = 10000: IS MC = 0.021875, IS QMC = 0.000097
  N = 20000: IS MC = 0.010941, IS QMC = -0.000030
  N = 50000: IS MC = -0.007276, IS QMC = -0.000001

Dimension d = 2
  N = 1000: IS MC = 0.382461, IS QMC = 0.384455
  N = 5000: IS MC = 0.419227, IS QMC = 0.406609
  N = 10000: IS MC = 0.415513, IS QMC = 0.396052
  N = 20000: IS MC = 0.401890, IS QMC = 0.401476
  N = 50000: IS MC = 0.410526, IS QMC = 0.405488

Dimension d = 3
  N = 1000: IS MC = 0.512919, IS QMC = 0.684261
  N = 5000: IS MC = 0.539900, IS QMC = 0.569808
  N = 10000: IS MC = 0.630148, IS QMC = 0.444802
  N = 20000: IS MC = 0.579459, IS QMC = 0.597402
  N = 50000: IS MC = 0.561716, IS QMC = 0.560892

Dimension d = 5
  N = 1000: IS MC = 2.673726, IS QMC = 0.416223
  N = 5000: IS MC = 0.460763, IS QMC = 1.318448
  N = 10000: IS MC = 0.466999, IS QMC = 0.512339
  N = 20000: IS MC = 0.411859, IS QMC = 20.686740
  

# Problèmes et solutions pour l'Importance Sampling

## 1. Mauvais choix de la densité d'importance
L'Importance Sampling (IS) est très sensible au choix de la densité d'importance $g(x)$.  
Si $g(x)$ ne couvre pas bien les régions où $f(x)$ est significatif, les pondérations définies par :

$$ w(x) = \frac{p(x)}{g(x)} $$

peuvent devenir extrêmement variables, entraînant une grande variance de l'estimateur.  
En grande dimension, une loi normale centrée en $0.5$ peut mal représenter les régions importantes de $f(x)$, ce qui engendre une instabilité des résultats.

---

## 2. Problème de dégénérescence en grande dimension
Lorsque la dimension $d$ augmente, la plupart des échantillons issus de $g(x)$ se retrouvent éloignés des régions où $f(x)$ est significatif.  
Cela conduit à une situation où très peu d'échantillons contribuent réellement à l'intégrale, ce qui augmente la variance et rend l'estimateur instable.



---

## 3. Inefficacité des méthodes Quasi-Monte Carlo en grande dimension
Les méthodes Quasi-Monte Carlo (QMC) offrent un gain en convergence pour les dimensions faibles à modérées.  
Cependant, lorsque la dimension $d$ augmente, les séquences de faible discrépance comme Sobol ou Halton deviennent moins efficaces, car elles sont optimisées pour des espaces de faible dimension.


