In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import os

In [2]:
def bisecting_kmeans(X, k, iter):
    n = X.shape[0]
    cluster_indices = np.zeros(n)

    # Step 1: Start with a single cluster including all observations
    current_cluster = 0
    clusters = [X]

    # Iterate until we have k clusters
    while len(clusters) < k:
        best_sse = np.inf
        best_subclusters = []

        # Step 2: Bisect the largest cluster using K-Means
        largest_cluster = clusters[current_cluster]
        kmeans = KMeans(n_clusters=2, max_iter=iter, n_init=1, random_state=0)
        kmeans.fit(largest_cluster[:, :-1])
        labels = kmeans.labels_
        centroids = kmeans.cluster_centers_

        # Calculate the Sum of Squared Errors (SSE)
        sse = np.sum((largest_cluster[:, :-1] - centroids[labels]) ** 2)

        # Store the best sub-clusters with minimum SSE
        if sse < best_sse:
            best_sse = sse
            best_subclusters = [largest_cluster[labels == 0], largest_cluster[labels == 1]]

        # Update the current cluster with the best sub-clusters
        clusters[current_cluster] = best_subclusters[0]
        clusters.append(best_subclusters[1])

        # Remove the largest cluster from the list of clusters
        clusters = [cluster for cluster in clusters if not np.array_equal(cluster, largest_cluster)]

    # Assign cluster indices to observations
    for i, cluster in enumerate(clusters):
        cluster_indices[np.isin(X, cluster).all(axis=1)] = i

    return cluster_indices

In [None]:
# Set the OMP_NUM_THREADS environment variable to 1
os.environ['OMP_NUM_THREADS'] = '1'

# Load the dataset into a Pandas dataframe
df = pd.read_csv('dist.csv', delimiter=';', header=None, names=['x', 'y', 'class'])

# Apply Bisecting k-Means clustering
k = 3  # Number of clusters
iter = 10  # Number of iterations for step 3
X = df.values  # Convert dataframe to numpy array
cluster_indices = bisecting_kmeans(X, k, iter)

print(cluster_indices)