In [32]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns  
import pandas as pd
ot.RandomGenerator.SetSeed(42)

In [44]:
import openturns as ot

def dp_by_stick_breaking(alpha=1.0, tau=1e-3, base_measure_sampler=None):
    
    weights = []
    thetas = []
    r = 1.0 

    beta_dist = ot.Beta(1.0, alpha, 0.0, 1.0)       

    while r > tau:
        v_k = beta_dist.getRealization()[0]
        w_k = v_k * r
        weights.append(w_k)
        r *= (1.0 - v_k)

        theta_k = base_measure_sampler.getRealization()[0]
        thetas.append(theta_k)

    return weights, thetas

gaussian = ot.Normal(2)

weights, thetas = dp_by_stick_breaking(alpha=2.0, tau=1e-3, base_measure_sampler=gaussian)

for i, (w, theta) in enumerate(zip(weights, thetas)):
    print(f"Composante {i} : poids = {w}, theta = {theta}")
print("Somme des poid (proche de 1):", sum(weights))

Composante 0 : poids = 0.02578455635940735, theta = -1.4371478714752166
Composante 1 : poids = 0.3338949504276918, theta = 0.058051089880919816
Composante 2 : poids = 0.051487239919291015, theta = -0.7265338989438834
Composante 3 : poids = 0.06639763099309359, theta = 0.9495285051806729
Composante 4 : poids = 0.20286405226856608, theta = -0.583428732607948
Composante 5 : poids = 0.15840838484693595, theta = -0.24074212669737308
Composante 6 : poids = 0.026489565072896185, theta = 1.3677733725036425
Composante 7 : poids = 0.04138750274246584, theta = -1.0946898345629414
Composante 8 : poids = 0.030859856315918296, theta = -1.019087386231946
Composante 9 : poids = 0.0208073295536284, theta = -2.389410607523775
Composante 10 : poids = 0.008714643570950779, theta = -0.2944027266764411
Composante 11 : poids = 0.0014084911957027522, theta = 1.3035049082214143
Composante 12 : poids = 0.017995732869870516, theta = -0.34997200458327565
Composante 13 : poids = 0.0014601337171963188, theta = -0.1

In [29]:
import numpy as np
from scipy.stats import beta

def dp_by_stick_breaking(alpha=1.0, tau=1e-3, base_measure_sampler=None):
    
    weights = []
    thetas = []
    r = 1.0 
    
    while r > tau:
        v_k = beta.rvs(1, alpha)
        w_k = v_k * r
        weights.append(w_k)
        r = r*(1 - v_k)

        theta_k = base_measure_sampler()
        thetas.append(theta_k)

    return weights, thetas


In [30]:
mean_dim = 2
def sample_from_G0():
    return np.random.multivariate_normal(mean=np.zeros(mean_dim), cov=np.eye(mean_dim))

weights, thetas = dp_by_stick_breaking(alpha=1.0, tau=1e-3, base_measure_sampler=sample_from_G0)

for i, (w, theta) in enumerate(zip(weights, thetas)):
    print(f"Composante {i} : poids = {w} ; theta = {theta}")

print("Nombre de composantes :", len(weights))
print("Somme des poid (proche de 1):", sum(weights))

Composante 0 : poids = 0.9577359768945803 ; theta = [0.6274325 0.3526197]
Composante 1 : poids = 0.010900204044801557 ; theta = [ 1.91943799 -0.10042409]
Composante 2 : poids = 0.025855592769359913 ; theta = [ 1.4848903  -2.16905164]
Composante 3 : poids = 0.0014841082638876203 ; theta = [0.65988691 0.10072554]
Composante 4 : poids = 0.00022651518528890412 ; theta = [0.31690259 0.79207493]
Composante 5 : poids = 0.001368373291403901 ; theta = [ 0.88465419 -0.90404695]
Composante 6 : poids = 0.0022674903596009437 ; theta = [ 1.26157449 -0.12019214]
Nombre de composantes : 7
Somme des poid (proche de 1): 0.9998382608089232
