In [1]:
import random

def euclidean_distance(point1, point2):
    return sum((p1 - p2) ** 2 for p1, p2 in zip(point1, point2)) ** 0.5

def assign_points_to_clusters(centroids, data):
    clusters = [[] for _ in centroids]
    for point in data:
        closest_centroid_idx = min(range(len(centroids)),
                                    key=lambda i: euclidean_distance(point, centroids[i]))
        clusters[closest_centroid_idx].append(point)
    return clusters

def calculate_new_centroids(clusters):
    return [[sum(values) / len(values) for values in zip(*cluster)] for cluster in clusters if cluster]

def k_means(data, k, max_iterations=100):
    centroids = random.sample(data, k)
    
    for _ in range(max_iterations):
        clusters = assign_points_to_clusters(centroids, data)
        
        new_centroids = calculate_new_centroids(clusters)
        
        if all(euclidean_distance(old, new) < 1e-4 for old, new in zip(centroids, new_centroids)):
            break
        
        centroids = new_centroids
    
    return clusters

data = [(1, 2), (1, 4), (1, 0),
        (10, 2), (10, 4), (10, 0)] 
clusters = k_means(data, k=2)
print(clusters)


[[(10, 2), (10, 4), (10, 0)], [(1, 2), (1, 4), (1, 0)]]


In [3]:
import numpy as np
import numexpr as ne

def initialize_centroids(data, k):
    indices = np.random.choice(data.shape[0], size=k, replace=False)
    return data[indices]

def assign_points_to_clusters_numpy(centroids, data):
    diff = data[:, np.newaxis] - centroids
    dist = np.sqrt(np.sum(diff**2, axis=2))
    return np.argmin(dist, axis=1)

def update_centroids(data, assignments, k):
    new_centroids = np.array([data[assignments == i].mean(axis=0) for i in range(k)])
    return new_centroids

def k_means_numpy(data, k, max_iterations=100):
    centroids = initialize_centroids(data, k)
    
    for _ in range(max_iterations):
        assignments = assign_points_to_clusters_numpy(centroids, data)
        
        new_centroids = update_centroids(data, assignments, k)
        
        if np.allclose(centroids, new_centroids, atol=1e-4):
            break
        
        centroids = new_centroids
    
    return assignments, centroids

data = np.array([[1, 2], [1, 4], [1, 0],
                 [10, 2], [10, 4], [10, 0]])
assignments, centroids = k_means_numpy(data, k=2)
print("Cluster Assignments:", assignments)
print("Centroids:", centroids)


Cluster Assignments: [1 1 1 0 0 0]
Centroids: [[10.  2.]
 [ 1.  2.]]
