In [1]:
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.metrics import (
    adjusted_rand_score,
    adjusted_mutual_info_score,
    homogeneity_score,
    silhouette_score,
    davies_bouldin_score,
    calinski_harabasz_score,
)

# Load the Iris dataset
iris = load_iris()
X = iris.data
y_true = iris.target  # Ground truth labels

# Perform KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
y_pred = kmeans.fit_predict(X)

# Evaluation Metrics with Ground Truth
print("Metrics with Ground Truth (Supervised):")
# Adjusted Rand Index (ARI) - Measures similarity between clusters and true labels
ari = adjusted_rand_score(y_true, y_pred)
print(f"Adjusted Rand Index (ARI): {ari} (Best: 1, Worst: -1)")

# Adjusted Mutual Information (AMI) - Measures information overlap between clusters and true labels
ami = adjusted_mutual_info_score(y_true, y_pred)
print(f"Adjusted Mutual Information (AMI): {ami} (Best: 1, Worst: 0)")

# Homogeneity Score - Checks if clusters contain only points from the same class
homogeneity = homogeneity_score(y_true, y_pred)
print(f"Homogeneity Score: {homogeneity} (Best: 1, Worst: 0)")

# Evaluation Metrics without Ground Truth
print("\nMetrics without Ground Truth (Unsupervised):")
# Silhouette Score - Measures how well-separated and cohesive clusters are
silhouette = silhouette_score(X, y_pred)
print(f"Silhouette Score: {silhouette} (Best: 1, Worst: -1)")

# Davies-Bouldin Index - Measures intra-cluster similarity vs inter-cluster separation
dbi = davies_bouldin_score(X, y_pred)
print(f"Davies-Bouldin Index: {dbi} (Best: 0, Worst: Infinity)")

# Calinski-Harabasz Index - Measures ratio of between-cluster to within-cluster dispersion
chi = calinski_harabasz_score(X, y_pred)
print(f"Calinski-Harabasz Index: {chi} (Best: Higher is better, no upper limit)")


Metrics with Ground Truth (Supervised):
Adjusted Rand Index (ARI): 0.7163421126838476 (Best: 1, Worst: -1)
Adjusted Mutual Information (AMI): 0.7386548254402864 (Best: 1, Worst: 0)
Homogeneity Score: 0.7364192881252849 (Best: 1, Worst: 0)

Metrics without Ground Truth (Unsupervised):
Silhouette Score: 0.551191604619592 (Best: 1, Worst: -1)
Davies-Bouldin Index: 0.6660385791628493 (Best: 0, Worst: Infinity)
Calinski-Harabasz Index: 561.5937320156642 (Best: Higher is better, no upper limit)
