In [1]:
import numpy as np

def k_means(data, k, max_iterations=100):
    # Step 1: Initialize centroids randomly
    centroids = initialize_centroids(data, k)
    
    for _ in range(max_iterations):
        # Step 2: Assign data points to nearest centroid
        labels = assign_labels(data, centroids)
        
        # Step 3: Update centroids based on assigned data points
        new_centroids = update_centroids(data, labels, k)
        
        # Check for convergence
        if np.array_equal(centroids, new_centroids):
            break
        
        centroids = new_centroids
    
    return labels, centroids

def initialize_centroids(data, k):
    # Randomly select k data points as initial centroids
    centroids_indices = np.random.choice(data.shape[0], size=k, replace=False)
    centroids = data[centroids_indices]
    return centroids

def assign_labels(data, centroids):
    # Calculate the Euclidean distance between each data point and centroids
    distances = np.linalg.norm(data[:, np.newaxis] - centroids, axis=-1)
    
    # Assign each data point to the nearest centroid
    labels = np.argmin(distances, axis=1)
    return labels

def update_centroids(data, labels, k):
    # Update centroids based on the mean of data points in each cluster
    centroids = np.array([data[labels == i].mean(axis=0) for i in range(k)])
    return centroids

In [2]:
data = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]])
k = 2

labels, centroids = k_means(data, k)
print("Cluster labels:", labels)
print("Centroids:", centroids)

Cluster labels: [1 1 0 0 1 0]
Centroids: [[7.33333333 9.        ]
 [1.16666667 1.46666667]]
