In [17]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import DBSCAN
from ipywidgets import interactive
import matplotlib

# Generate synthetic data (e.g., two interleaving half circles)
X, _ = make_moons(n_samples=300, noise=0.1)

# k-NN to get distances to the k-th nearest neighbors (k=4)
k = 4
neighbors = NearestNeighbors(n_neighbors=k)
neighbors.fit(X)
distances, indices = neighbors.kneighbors(X)

# Sort the distances for each point (use the k-th nearest neighbor distance)
sorted_distances = np.sort(distances[:, -1])

def update_plot(eps):
    # Create the figure with two subplots
    fig, ax = plt.subplots(1, 2, figsize=(14, 6))

    # --- Plot 1: k-NN distance plot ---
    ax[0].plot(sorted_distances)
    ax[0].set_xlabel("Sorted Points")
    ax[0].set_ylabel("Distance to 4th Nearest Neighbor")
    ax[0].set_title("k-NN Distance Plot")
    
    # Red vertical line corresponding to the selected eps
    ax[0].axhline(y=eps, color='r', linestyle='--', label=f'eps = {eps:.2f}')
    ax[0].legend()
    
    # --- Plot 2: DBSCAN Clustering Result ---
    dbscan = DBSCAN(eps=eps, min_samples=5)
    labels = dbscan.fit_predict(X)
    unique_labels = set(labels)
    colormap = matplotlib.colormaps['tab10']
    
    for i, label in enumerate(unique_labels):
        cluster_points = X[labels == label]
        ax[1].scatter(cluster_points[:, 0], cluster_points[:, 1], 
                      color=colormap(i / len(unique_labels)), label=f"Cluster {label}" if label != -1 else "Noise")
    
    ax[1].set_xlabel("Feature 1")
    ax[1].set_ylabel("Feature 2")
    ax[1].set_title("DBSCAN Clustering")
    ax[1].legend()
    
    plt.tight_layout()
    plt.show()

interactive_plot = interactive(update_plot, eps=(0.01, 0.5, 0.01))
interactive_plot


interactive(children=(FloatSlider(value=0.25, description='eps', max=0.5, min=0.01, step=0.01), Output()), _do…