In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs
from scipy.stats import multivariate_normal


In [None]:
# Générer un dataset synthétique de 300 points avec 3 clusters
X, y_true = make_blobs(n_samples=300, centers=3, cluster_std=0.60, random_state=0)
plt.scatter(X[:, 0], X[:, 1], s=50, cmap='viridis')
plt.title("Jeu de données synthétique")
plt.show()


In [None]:
def initialize_parameters(X, k):
    n, d = X.shape
    mu = X[np.random.choice(n, k, False)]
    cov = [np.eye(d)] * k
    pi = np.ones(k) / k
    return mu, cov, pi


In [None]:
def e_step(X, mu, cov, pi, k):
    n = X.shape[0]
    r = np.zeros((n, k))
    for i in range(k):
        r[:, i] = pi[i] * multivariate_normal(mean=mu[i], cov=cov[i]).pdf(X)
    r = r / r.sum(axis=1, keepdims=True)
    return r


In [None]:
def m_step(X, r, k):
    n, d = X.shape
    mu = np.zeros((k, d))
    cov = []
    pi = np.zeros(k)
    for i in range(k):
        r_i = r[:, [i]]
        total_r_i = r_i.sum()
        mu[i] = (X * r_i).sum(axis=0) / total_r_i
        cov_i = (X - mu[i]).T @ np.diag(r_i.flatten()) @ (X - mu[i]) / total_r_i
        cov.append(cov_i + 1e-6 * np.eye(d))  # Ajout d'une petite valeur pour la stabilité numérique
        pi[i] = total_r_i / n
    return mu, cov, pi
