In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import adjusted_rand_score

# Step 1: Load digit dataset
digits = load_digits()
X = digits.data
y_true = digits.target  # True labels, used only for evaluation

# Step 2: Dimensionality reduction (optional, for visualization)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Step 3: Fit GMM using EM algorithm
gmm = GaussianMixture(n_components=10, covariance_type='full', random_state=42)
gmm.fit(X)
y_gmm = gmm.predict(X)

# Step 4: Evaluation (Adjusted Rand Index)
ari = adjusted_rand_score(y_true, y_gmm)
print(f"Adjusted Rand Index (clustering quality): {ari:.2f}")

# Step 5: Visualization
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_gmm, cmap='tab10', s=15)
plt.title("GMM Clustering of Handwritten Digits (PCA Projection)")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.colorbar(label="Cluster Label")
plt.grid(True)
plt.show()
