In [1]:
import random
import numpy as np

def k_means(data, k, max_iterations=100):
    # Randomly initialize the centroids
    centroids = random.sample(data, k)
    
    # Iterate a maximum of max_iterations times
    for _ in range(max_iterations):
        # Create empty clusters to store data points
        clusters = [[] for _ in range(k)]
        
        # Assign each data point to the nearest centroid
        for point in data:
            distances = [np.linalg.norm(np.array(point) - np.array(centroid)) for centroid in centroids]
            nearest_centroid_index = np.argmin(distances)
            clusters[nearest_centroid_index].append(point)
        
        # Compute new centroids by taking the mean of each cluster
        new_centroids = [np.mean(cluster, axis=0) for cluster in clusters]
        
        # Check if the centroids have converged
        if np.array_equal(centroids, new_centroids):
            break
        
        # Update the centroids
        centroids = new_centroids
    
    return clusters, centroids

def assign_to_cluster(new_point, centroids):
    # Calculate distances between the new point and centroids
    distances = [np.linalg.norm(np.array(new_point) - np.array(centroid)) for centroid in centroids]
    # Find the index of the nearest centroid
    nearest_centroid_index = np.argmin(distances)
    return nearest_centroid_index

# Example usage
data = [[1, 2], [2, 1], [3, 4], [4, 3], [10, 8], [8, 10], [9, 7], [7, 9]]
k = 2

# Apply k-means clustering to the data
clusters, centroids = k_means(data, k)

# Print the final clusters and centroids
print("Clusters:")
for i, cluster in enumerate(clusters):
    print(f"Cluster {i+1}: {cluster}")
    
print("\nCentroids:")
for i, centroid in enumerate(centroids):
    print(f"Centroid {i+1}: {centroid}")

# Test with a new data point
new_data_point = [5, 6]
cluster_index = assign_to_cluster(new_data_point, centroids)
print(f"\nThe new data point belongs to Cluster {cluster_index+1}")

Clusters:
Cluster 1: [[10, 8], [8, 10], [9, 7], [7, 9]]
Cluster 2: [[1, 2], [2, 1], [3, 4], [4, 3]]

Centroids:
Centroid 1: [8.5 8.5]
Centroid 2: [2.5 2.5]

The new data point belongs to Cluster 1
