In [1]:
import numpy as np

# Distance function
def distance(a, b):
    return np.sqrt(np.sum((a - b)**2))

def kmeans(X, K, max_iters=100):
    # Randomly choose K initial centroids
    np.random.seed(42)
    centroids = X[np.random.choice(len(X), K, replace=False)]
    
    for _ in range(max_iters):
        # Step 1: Assign clusters
        clusters = [[] for _ in range(K)]
        
        for point in X:
            dists = [distance(point, centroid) for centroid in centroids]
            cluster_idx = np.argmin(dists)
            clusters[cluster_idx].append(point)
        
        # Step 2: Update centroids
        new_centroids = []
        for cluster in clusters:
            if len(cluster) > 0:
                new_centroids.append(np.mean(cluster, axis=0))
            else:
                new_centroids.append(centroids[len(new_centroids)])   # keep old centroid
            
        new_centroids = np.array(new_centroids)
        
        # Stop if converged
        if np.all(centroids == new_centroids):
            break
        
        centroids = new_centroids
    
    return centroids, clusters


# Example data
X = np.array([
    [1, 2], [1, 4], [1, 0],
    [10, 2], [10, 4], [10, 0]
])

K = 2
centroids, clusters = kmeans(X, K)

print("Final centroids:\n", centroids)
print("\nClusters:")
for i, cluster in enumerate(clusters):
    print(f"Cluster {i+1}: {cluster}")


Final centroids:
 [[5.5 1. ]
 [5.5 4. ]]

Clusters:
Cluster 1: [array([1, 2]), array([1, 0]), array([10,  2]), array([10,  0])]
Cluster 2: [array([1, 4]), array([10,  4])]
