In [None]:
# K Means from scratch
import numpy as np

class KMeans:
    def __init__(self, k=3, max_iters=100):
        self.k = k
        self.max_iters = max_iters
        self.centroids = None

    def fit(self, X):
        # Randomly initialize centroids
        np.random.seed(42)
        random_idx = np.random.choice(len(X), self.k, replace=False)
        self.centroids = X[random_idx]

        for _ in range(self.max_iters):
            # Step 1: Assign points to nearest centroid
            labels = self._assign_clusters(X) # array of length m

            # Step 2: Recompute centroids
            new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(self.k)]) # labels == i builds a boolean mask of shape (m,) marking which rows of X belong to cluster i
            # X[labels == i] selects only those rows → shape (n_i, n_features) where n_i is the number of points in cluster i.

            # Check for convergence
            if np.allclose(self.centroids, new_centroids):
                break

            self.centroids = new_centroids

        return self

    def _assign_clusters(self, X):
        # Compute distances from points to each centroid
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)   # will be a vector of one vector for each point containing the distance of that point to each centroid.
        # Assign cluster with smallest distance
        return np.argmin(distances, axis=1)      # an array of length m.

    def predict(self, X):
        return self._assign_clusters(X)
    
np.random.seed(42)
X = np.random.randn(100,2)

print(X[:3])

kmeans = KMeans(k=2,max_iter=200)
kmeans.fit(X)

kmeans.predict(X[:3])
# kmeans.centroids    

In [None]:
# Toy dataset
X = np.array([
    [1, 2], [1.5, 1.8], [5, 8],
    [8, 8], [1, 0.6], [9, 11]
])

kmeans = KMeans(k=2, max_iters=100)
kmeans.fit(X)

print("Final centroids:\n", kmeans.centroids)
print("Cluster assignments:", kmeans.predict(X))

Final centroids:
 [[1.16666667 1.46666667]
 [7.33333333 9.        ]]
Cluster assignments: [0 0 1 1 0 1]


In [9]:
import numpy as np

class KMeans_Clustering:
    def __init__(self, k=2, max_iter=100):
        self.k = k
        self.max_iter = max_iter
        self.centroids = None

    def assign_cluster(self, X):
        distances = np.linalg.norm(X[:,np.newaxis] - self.centroids, axis=2)
        clusters = np.argmin(distances, axis=1)
        return clusters

    def fit(self, X):
        # Randomly initialize the centroids
        np.random.seed(42)
        random_idx = np.random.choice(len(X), self.k, replace=False)
        self.centroids = X[random_idx]

        for i in range(self.max_iter):
            # Assign the points to clusters
            clusters = self.assign_cluster(X)    

            # Recompute the new centroids
            new_centroids = np.array([X[clusters == i].mean(axis=0) for i in range(self.k)])

            # Check for convergence
            if np.allclose(new_centroids, self.centroids):
                break

            self.centroids = new_centroids

    def predict(self, X):
        return self.assign_cluster(X)        

In [12]:
np.random.seed(42)
X = np.random.randn(100,2)

print(X[:3])

kmeans = KMeans_Clustering(k=2,max_iter=200)
kmeans.fit(X)

kmeans.predict(X[:3])
# kmeans.centroids

[[ 0.49671415 -0.1382643 ]
 [ 0.64768854  1.52302986]
 [-0.23415337 -0.23413696]]


array([1, 0, 1])