# 实验九：KMeans与DBSCAN 聚类对比（含可视化）

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_rand_score, homogeneity_score, completeness_score

## 生成三种数据集（圆环、团簇、S 曲线）

In [None]:
n_samples = 300
np.random.seed(42)
X1, y1 = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
X2, y2 = datasets.make_blobs(n_samples=n_samples, centers=3, random_state=42)
X3, y3 = datasets.make_s_curve(n_samples=n_samples, noise=0.1)
X3_2d = PCA(n_components=2).fit_transform(X3)  # S-curve 降维到 2D

## 数据标准化 + 数据集组合

In [None]:
X1_scaled = StandardScaler().fit_transform(X1)
X2_scaled = StandardScaler().fit_transform(X2)
X3_scaled = StandardScaler().fit_transform(X3_2d)

datasets_list = [
    ("Circles", X1_scaled, y1, 2),
    ("Blobs", X2_scaled, y2, 3),
    ("S-curve", X3_scaled, y3, 2)
]

## 评估函数定义

In [None]:
def evaluate_clustering(true_labels, pred_labels):
    ari = adjusted_rand_score(true_labels, pred_labels)
    homo = homogeneity_score(true_labels, pred_labels)
    comp = completeness_score(true_labels, pred_labels)
    return f"ARI: {ari:.2f}\nHomo: {homo:.2f}\nComp: {comp:.2f}"

## 聚类实验与可视化展示

In [None]:
plt.figure(figsize=(15, 10))
for idx, (name, X, true_labels, n_clusters) in enumerate(datasets_list):
    # KMeans
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(X)
    kmeans_metrics = evaluate_clustering(true_labels, kmeans_labels)

    # DBSCAN（调整不同数据集的参数）
    if name == "Blobs":
        dbscan = DBSCAN(eps=0.5, min_samples=5)
    else:
        dbscan = DBSCAN(eps=0.3, min_samples=5)
    dbscan_labels = dbscan.fit_predict(X)
    dbscan_metrics = evaluate_clustering(true_labels, dbscan_labels)

    # 原始数据分布
    plt.subplot(3, 3, idx * 3 + 1)
    plt.scatter(X[:, 0], X[:, 1], c=true_labels, cmap='bwr', s=10)
    plt.title(f"{name}\n(True Labels)")

    # KMeans 聚类结果
    plt.subplot(3, 3, idx * 3 + 2)
    plt.scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='bwr', s=10)
    plt.title(f"K-means\n{kmeans_metrics}")

    # DBSCAN 聚类结果
    plt.subplot(3, 3, idx * 3 + 3)
    plt.scatter(X[:, 0], X[:, 1], c=dbscan_labels, cmap='bwr', s=10)
    plt.title(f"DBSCAN\n{dbscan_metrics}")

plt.tight_layout()
plt.show()