In [12]:
import random
import numpy as np

In [None]:
# Function to calculate Euclidean distance between two points
def euclidean_distance(point1, point2):
    return np.sqrt(sum(  [(p1 - p2) ** 2 for p1, p2 in zip(point1, point2)]  ))


# K-Means clustering function
def k_means(X, k, max_iters=100):
    # Initialize centroids by randomly selecting k points from the dataset
    centroids = random.sample(X, k)

    for _ in range(max_iters):
        # Create empty clusters
        clusters = {i: [] for i in range(k)}

        # Assign each point to the nearest centroid
        for point in X:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]  # Compute distances to all centroids. This will return k distances for each point
            closest_cluster = np.argmin(distances)  # Find index of the nearest centroid
            clusters[closest_cluster].append(point)  # Assign point to the closest cluster

        # Compute new centroids as the mean of assigned points
        new_centroids = []
        for i, cluster in clusters.items():
            new_centroids.append(np.mean(cluster, axis=0).tolist())
        
        # Stop if centroids do not change
        if new_centroids == centroids:
            break
        
        centroids = new_centroids
    
    return centroids, clusters

In [34]:
X = [[1, 2], [2, 3], [3, 4], [8, 9], [9, 10], [10, 11]]  # Sample dataset
k = 2  # Number of clusters
centroids, clusters = k_means(X, k)
print("Centroids:", centroids)
print("Clusters:", clusters)

Centroids: [[2.0, 3.0], [9.0, 10.0]]
Clusters: {0: [[1, 2], [2, 3], [3, 4]], 1: [[8, 9], [9, 10], [10, 11]]}


In [36]:
X = [[1, 2, 3], [2, 3, 4], [3, 4, 5], [8, 9, 10], [9, 10, 11], [10, 11, 12]]  # 3D dataset
k = 2  # Number of clusters
centroids, clusters = k_means(X, k)
print("Centroids:", centroids)
print("Clusters:", clusters)

Centroids: [[2.0, 3.0, 4.0], [9.0, 10.0, 11.0]]
Clusters: {0: [[1, 2, 3], [2, 3, 4], [3, 4, 5]], 1: [[8, 9, 10], [9, 10, 11], [10, 11, 12]]}
