In [3]:
import numpy as np
data = np.array([
    [1.0, 1.0],
    [1.5, 2.0],
    [3.0, 4.0],
    [5.0, 7.0],
    [3.5, 5.0],
    [4.5, 5.0],
    [3.5, 4.5]
])

k = 2
max_iterations = 100
np.random.seed(42)
centroids = data[np.random.choice(data.shape[0], k, replace=False)]


def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))


for iteration in range(max_iterations):

    clusters = [[] for _ in range(k)]
    for point in data:
        distances = [euclidean_distance(point, centroid) for centroid in centroids]
        cluster_index = np.argmin(distances)
        clusters[cluster_index].append(point)


    new_centroids = []
    for cluster in clusters:
        if cluster:
            new_centroid = np.mean(cluster, axis=0)
        else:
            new_centroid = data[np.random.choice(data.shape[0])]
        new_centroids.append(new_centroid)

    new_centroids = np.array(new_centroids)

    if np.all(centroids == new_centroids):
        print(f"Converged after {iteration + 1} iterations")
        break

    centroids = new_centroids


print("Final centroids:")
print(centroids)

for i, cluster in enumerate(clusters):
    print(f"\nCluster {i + 1}:")
    print(np.array(cluster))

Converged after 3 iterations
Final centroids:
[[1.25 1.5 ]
 [3.9  5.1 ]]

Cluster 1:
[[1.  1. ]
 [1.5 2. ]]

Cluster 2:
[[3.  4. ]
 [5.  7. ]
 [3.5 5. ]
 [4.5 5. ]
 [3.5 4.5]]
