In [15]:
# Step 1: Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_blobs
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
import os

In [16]:

# Step 2: Ensure visuals folder exists
os.makedirs("visuals", exist_ok=True)

# Step 3: Define reusable plot function
def plot_clusters(X, labels, title, filename):
    plt.figure(figsize=(5, 4))
    plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='Set1', s=30)
    plt.title(title)
    plt.tight_layout()
    plt.savefig(f"visuals/{filename}")
    plt.close()
    print(f"Saved plot to visuals/{filename}")

In [17]:
# Dataset 1: Moons (DBSCAN works well)

X_moons, _ = make_moons(n_samples=300, noise=0.05, random_state=42)
X_moons = StandardScaler().fit_transform(X_moons)

# DBSCAN on Moons
dbscan = DBSCAN(eps=0.3, min_samples=5)
labels_db = dbscan.fit_predict(X_moons)
plot_clusters(X_moons, labels_db, "DBSCAN on Moons", "moons_dbscan.png")

# k-Means on Moons
kmeans = KMeans(n_clusters=2, random_state=42)
labels_km = kmeans.fit_predict(X_moons)
plot_clusters(X_moons, labels_km, "k-Means on Moons", "moons_kmeans.png")

# Hierarchical Clustering on Moons
agg = AgglomerativeClustering(n_clusters=2)
labels_hc = agg.fit_predict(X_moons)
plot_clusters(X_moons, labels_hc, "Hierarchical on Moons", "moons_hierarchical.png")


Saved plot to visuals/moons_dbscan.png
Saved plot to visuals/moons_kmeans.png
Saved plot to visuals/moons_hierarchical.png


In [18]:
# Dataset 2: Blobs with Varying Density (DBSCAN struggles)

X_blobs, _ = make_blobs(n_samples=300, centers=3, cluster_std=[1.0, 2.5, 0.5], random_state=42)
X_blobs = StandardScaler().fit_transform(X_blobs)

# DBSCAN on Blobs
dbscan_b = DBSCAN(eps=0.3, min_samples=5)
labels_db_b = dbscan_b.fit_predict(X_blobs)
plot_clusters(X_blobs, labels_db_b, "DBSCAN on Blobs (Varying Density)", "blobs_dbscan.png")

# k-Means on Blobs
kmeans_b = KMeans(n_clusters=3, random_state=42)
labels_km_b = kmeans_b.fit_predict(X_blobs)
plot_clusters(X_blobs, labels_km_b, "k-Means on Blobs", "blobs_kmeans.png")

# Hierarchical Clustering on Blobs
agg_b = AgglomerativeClustering(n_clusters=3)
labels_hc_b = agg_b.fit_predict(X_blobs)
plot_clusters(X_blobs, labels_hc_b, "Hierarchical on Blobs", "blobs_hierarchical.png")

Saved plot to visuals/blobs_dbscan.png
Saved plot to visuals/blobs_kmeans.png
Saved plot to visuals/blobs_hierarchical.png
