# Clustering Playground
This notebook demonstrates K-means, Agglomerative (hierarchical), and DBSCAN on synthetic and real-like datasets with evaluation and visualization.

In [None]:
# Setup
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs, make_moons
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
np.random.seed(42)
%matplotlib inline

## 1) Create datasets

In [None]:
X_blobs, _ = make_blobs(n_samples=800, centers=4, cluster_std=1.2, random_state=42)
X_moons, _ = make_moons(n_samples=800, noise=0.06, random_state=42)
scaler = StandardScaler()
Xb = scaler.fit_transform(X_blobs)
Xm = scaler.fit_transform(X_moons)
fig, axs = plt.subplots(1,2, figsize=(10,4))
axs[0].scatter(Xb[:,0], Xb[:,1], s=5); axs[0].set_title('Blobs (std)')
axs[1].scatter(Xm[:,0], Xm[:,1], s=5); axs[1].set_title('Moons (std)')
plt.show()

## 2) K-means on blobs

In [None]:
km = KMeans(n_clusters=4, init='k-means++', random_state=42)
labels_km = km.fit_predict(Xb)
sil_km = silhouette_score(Xb, labels_km)
dbi_km = davies_bouldin_score(Xb, labels_km)
chi_km = calinski_harabasz_score(Xb, labels_km)
plt.scatter(Xb[:,0], Xb[:,1], c=labels_km, s=5, cmap='tab10');
plt.scatter(km.cluster_centers_[:,0], km.cluster_centers_[:,1], c='red', marker='x', s=100);
plt.title(f'K-means (sil={sil_km:.2f}, DBI={dbi_km:.2f}, CH={chi_km:.0f})'); plt.show()

## 3) Agglomerative (Ward) on blobs

In [None]:
agg = AgglomerativeClustering(n_clusters=4, linkage='ward')
labels_agg = agg.fit_predict(Xb)
sil_agg = silhouette_score(Xb, labels_agg)
dbi_agg = davies_bouldin_score(Xb, labels_agg)
chi_agg = calinski_harabasz_score(Xb, labels_agg)
plt.scatter(Xb[:,0], Xb[:,1], c=labels_agg, s=5, cmap='tab10');
plt.title(f'Agglomerative-Ward (sil={sil_agg:.2f}, DBI={dbi_agg:.2f}, CH={chi_agg:.0f})'); plt.show()

## 4) DBSCAN on moons

In [None]:
dbs = DBSCAN(eps=0.3, min_samples=5)
labels_dbs = dbs.fit_predict(Xm)
n_clusters = len(set(labels_dbs)) - (1 if -1 in labels_dbs else 0)
sil = silhouette_score(Xm, labels_dbs) if n_clusters > 1 else float('nan')
plt.scatter(Xm[:,0], Xm[:,1], c=labels_dbs, s=5, cmap='tab10');
plt.title(f'DBSCAN moons (clusters={n_clusters}, sil={sil:.2f})'); plt.show()

## 5) Real-like high-dim via PCA (using blobs)

In [None]:
# Create a 10-D variant and reduce to 2D for visualization
X_hd, _ = make_blobs(n_samples=1000, centers=5, n_features=10, cluster_std=2.0, random_state=42)
X_hd = StandardScaler().fit_transform(X_hd)
pca = PCA(n_components=2, random_state=42)
X_hd_2d = pca.fit_transform(X_hd)
for name, model in [('KMeans-5', KMeans(n_clusters=5, random_state=42)),
                    ('Agglo-5', AgglomerativeClustering(n_clusters=5, linkage='ward')),
                    ('DBSCAN', DBSCAN(eps=0.7, min_samples=5))]:
    labels = model.fit_predict(X_hd)
    sil = silhouette_score(X_hd, labels) if len(set(labels))>1 else float('nan')
    plt.figure(figsize=(4,3))
    plt.scatter(X_hd_2d[:,0], X_hd_2d[:,1], c=labels, s=5, cmap='tab10');
    plt.title(f'{name} on high-dim (sil={sil:.2f})')
    plt.tight_layout(); plt.show()