In [11]:
import numpy as np
import pandas as pd

In [17]:
import numpy as np
import pandas as pd

def k_means_clustering(data, initial_centroids, max_iterations=100):

    centroids = initial_centroids

    for _ in range(max_iterations):
        # Assign each point to the nearest centroid
        distances = np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)

        # Calculate new centroids as the mean of assigned points
        centroids = np.array([data[labels == cluster].mean(axis=0) for cluster in range(initial_centroids.shape[0])])

    return centroids, labels


# Main
if __name__ == "__main__":
    # Generate random positive real numbers for the dataset
    np.random.seed(42)
    data = np.random.uniform(1.0, 10.0, (10, 2))  # 10 points, 2 features
    k = 3  # Number of clusters

    # Convert data to DataFrame
    df = pd.DataFrame(data, columns=["Feature_1", "Feature_2"])
    print("Original Data:")
    print(df)

    # Randomly initialize centroids
    initial_centroids = data[np.random.choice(data.shape[0], k, replace=False)]

    print("\nInitial Centroids:")
    print(initial_centroids)

    # First iteration of K-means
    centroids, labels = k_means_clustering(data, initial_centroids)
    df["Cluster"] = labels
    print("\nClustered Data after First Iteration:")
    print(df)
    print("\nUpdated Centroids after First Iteration:")
    print(centroids)

    # Second iteration with updated centroids
    centroids, labels = k_means_clustering(data, centroids)
    df["Cluster"] = labels
    print("\nClustered Data after Second Iteration:")
    print(df)
    print("\nUpdated Centroids after Second Iteration:")
    print(centroids)


Original Data:
   Feature_1  Feature_2
0   4.370861   9.556429
1   7.587945   6.387926
2   2.404168   2.403951
3   1.522753   8.795585
4   6.410035   7.372653
5   1.185260   9.729189
6   8.491984   2.911052
7   2.636425   2.650641
8   3.738180   5.722808
9   4.887505   3.621062

Initial Centroids:
[[7.58794548 6.38792636]
 [1.18526045 9.72918867]
 [6.41003511 7.3726532 ]]

Clustered Data after First Iteration:
   Feature_1  Feature_2  Cluster
0   4.370861   9.556429        1
1   7.587945   6.387926        0
2   2.404168   2.403951        2
3   1.522753   8.795585        1
4   6.410035   7.372653        0
5   1.185260   9.729189        1
6   8.491984   2.911052        0
7   2.636425   2.650641        2
8   3.738180   5.722808        2
9   4.887505   3.621062        2

Updated Centroids after First Iteration:
[[7.49665478 5.55721052]
 [2.35962468 9.36040091]
 [3.41656946 3.59961535]]

Clustered Data after Second Iteration:
   Feature_1  Feature_2  Cluster
0   4.370861   9.556429        1