In [5]:
import numpy as np

def centeroidnp(arr):
    length = arr.shape[0]
    sum_x = np.sum(arr[:, 0])
    sum_y = np.sum(arr[:, 1])
    if arr.shape[1] == 3:
        sum_z = np.sum(arr[:, 2])
        return sum_x/length, sum_y/length, sum_z/length
    return sum_x/length, sum_y/length

def k_means_clustering(points: list[tuple[float, float]], k: int, initial_centroids: list[tuple[float, float]], max_iterations: int) -> list[tuple[float, float]]:
    iterations = 0
    while iterations<max_iterations:
        grouping = [[] for _ in range(len(initial_centroids))]
        for point in points:
            best_i, best_dist = 0, None
            for i in range(len(initial_centroids)):
                euc_dist = np.linalg.norm(np.array(point) - np.array(initial_centroids[i]))
                if best_dist is None or euc_dist<best_dist:
                    best_i = i
                    best_dist = euc_dist
            grouping[best_i].append(point)
        for i in range(len(initial_centroids)):
            clustered = grouping[i]
            if clustered:
                initial_centroids[i] = centeroidnp(np.array(clustered))
        iterations+=1
    return initial_centroids

k_means_clustering(points = [(1, 2), (1, 4), (1, 0), (10, 2), (10, 4), (10, 0)], k = 2, initial_centroids = [(1, 1), (10, 1)], max_iterations = 10)


[(np.float64(1.0), np.float64(2.0)), (np.float64(10.0), np.float64(2.0))]

In [6]:
print(k_means_clustering([(0, 0, 0), (2, 2, 2), (1, 1, 1), (9, 10, 9), (10, 11, 10), (12, 11, 12)], 2, [(1, 1, 1), (10, 10, 10)], 10))

[(np.float64(1.0), np.float64(1.0), np.float64(1.0)), (np.float64(10.333333333333334), np.float64(10.666666666666666), np.float64(10.333333333333334))]


In [4]:
np.array([(0, 0, 0), (2, 2, 2), (1, 1, 1), (9, 10, 9), (10, 11, 10), (12, 11, 12)]).shape

(6, 3)

The website preferred solution:

In [None]:
import numpy as np

def euclidean_distance(a, b):
    return np.sqrt(((a - b) ** 2).sum(axis=1))

def k_means_clustering(points, k, initial_centroids, max_iterations):
    points = np.array(points)
    centroids = np.array(initial_centroids)
    
    for iteration in range(max_iterations):
        # Assign points to the nearest centroid
        distances = np.array([euclidean_distance(points, centroid) for centroid in centroids])
        assignments = np.argmin(distances, axis=0)

        new_centroids = np.array([points[assignments == i].mean(axis=0) if len(points[assignments == i]) > 0 else centroids[i] for i in range(k)])
        
        # Check for convergence
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids
        centroids = np.round(centroids,4)
    return [tuple(centroid) for centroid in centroids]