In [1]:
import numpy as np

def kmeans(X, k, max_iters=100):
    """
    K-means clustering algorithm implementation without using any external packages.

    Parameters:
    X (numpy.ndarray): Input data points, each row represents a data point.
    k (int): Number of clusters.
    max_iters (int): Maximum number of iterations.

    Returns:
    centroids (numpy.ndarray): Final centroids of the clusters.
    labels (numpy.ndarray): Labels of each data point indicating which cluster it belongs to.
    """

    # Randomly initialize centroids
    np.random.seed(0)
    centroids = X[np.random.choice(X.shape[0], k, replace=False)]

    for _ in range(max_iters):
        # Assign each data point to the nearest centroid
        distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
        labels = np.argmin(distances, axis=0)

        # Update centroids
        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

        # Check for convergence
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids, labels

# Example usage:
# Generate some random data for demonstration
np.random.seed(0)
X = np.random.randn(100, 2)  # 100 data points in 2 dimensions

# Number of clusters
k = 3

# Perform K-means clustering
centroids, labels = kmeans(X, k)

print("Final centroids:")
print(centroids)
print("Labels:")
print(labels)


Final centroids:
[[-0.880185   -0.22121937]
 [ 0.25066515  1.18144736]
 [ 0.96751239 -0.52427407]]
Labels:
[2 1 2 2 1 1 2 1 2 2 0 2 2 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 2 0 0 2 2
 2 0 0 0 2 1 2 0 1 1 1 1 1 2 0 1 0 1 1 1 1 2 1 2 1 0 1 1 0 0 0 2 2 0 1 2 1
 0 1 0 0 2 0 1 2 0 0 2 2 2 0 0 0 0 0 2 0 1 0 2 2 1 1]
