In [3]:
import numpy as np

class DBSCAN:
    """
    Density-Based Spatial Clustering of Applications with Noise (DBSCAN) from scratch.
    """
    def __init__(self, eps=0.5, min_samples=5):
        self.eps = eps
        self.min_samples = min_samples
        self.labels_ = None

    def fit(self, X):
        n = X.shape[0]
        labels = np.full(n, -1, dtype=int)  # -1 means noise
        cluster_id = 0
        visited = np.zeros(n, dtype=bool)

        for i in range(n):
            if visited[i]:
                continue
            visited[i] = True
            neighbors = self._region_query(X, i)
            if len(neighbors) < self.min_samples:
                labels[i] = -1  # noise
            else:
                self._expand_cluster(X, labels, i, neighbors, cluster_id, visited)
                cluster_id += 1
        self.labels_ = labels

    def _region_query(self, X, idx):
        # Returns indices of all points within eps of point idx
        dists = np.linalg.norm(X - X[idx], axis=1)
        return np.where(dists <= self.eps)[0].tolist()

    def _expand_cluster(self, X, labels, idx, neighbors, cluster_id, visited):
        labels[idx] = cluster_id
        i = 0
        while i < len(neighbors):
            n_idx = neighbors[i]
            if not visited[n_idx]:
                visited[n_idx] = True
                n_neighbors = self._region_query(X, n_idx)
                if len(n_neighbors) >= self.min_samples:
                    neighbors += [n for n in n_neighbors if n not in neighbors]
            if labels[n_idx] == -1:
                labels[n_idx] = cluster_id
            i += 1

> ## Example usage:

In [4]:
# Example: two clusters and some noise
X = np.array([
        [1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80],
        [8, 9], [7, 8], [1, 1.5], [1.2, 1.8]
    ])
dbscan = DBSCAN(eps=1.5, min_samples=2)
dbscan.fit(X)
print("DBSCAN labels:", dbscan.labels_)

DBSCAN labels: [ 0  0  0  1  1 -1  1  1  0  0]
