In [2]:
import numpy as np

class KMeans:
    """
    K-Means clustering from scratch.
    """

    def __init__(self, n_clusters=3, max_iter=100, tol=1e-4):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol  # tolerance to declare convergence

    def fit(self, X):
        n_samples, n_features = X.shape
        # Randomly initialize centroids
        rng = np.random.default_rng()
        random_idx = rng.choice(n_samples, self.n_clusters, replace=False)
        self.centroids_ = X[random_idx]

        for i in range(self.max_iter):
            # Assign clusters
            distances = np.linalg.norm(X[:, np.newaxis] - self.centroids_, axis=2)
            self.labels_ = np.argmin(distances, axis=1)

            # Compute new centroids
            new_centroids = np.array([
                X[self.labels_ == k].mean(axis=0) if np.any(self.labels_ == k) else self.centroids_[k]
                for k in range(self.n_clusters)
            ])

            # Check for convergence
            diff = np.linalg.norm(self.centroids_ - new_centroids)
            if diff < self.tol:
                break
            self.centroids_ = new_centroids

    def predict(self, X):
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids_, axis=2)
        return np.argmin(distances, axis=1)

> ## Example usage:

In [3]:
X = np.array([
        [1.0, 2.0], [1.5, 1.8], [5.0, 8.0],
        [8.0, 8.0], [1.0, 0.6], [9.0, 11.0],
        [8.0, 2.0], [10.0, 2.0], [9.0, 3.0]
    ])
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
print("Centroids:\n", kmeans.centroids_)
print("Labels:", kmeans.labels_)
test_pts = np.array([[0.5, 2.0], [8.5, 7.5]])
print("Predicted clusters for test points:", kmeans.predict(test_pts))

Centroids:
 [[7.33333333 9.        ]
 [9.         2.33333333]
 [1.16666667 1.46666667]]
Labels: [2 2 0 0 2 0 1 1 1]
Predicted clusters for test points: [2 0]
