In [2]:
import pandas as pd
import numpy as np

# Sample data
data = {
    'Feature1': [10, 15, 20, 25, 30],
    'Feature2': [30, 25, 20, 15, 10]
}

# Creating DataFrame
df = pd.DataFrame(data)
X = df.to_numpy()

# Number of clusters
k = 3

# Initialize centroids: randomly select k data points as initial centroids
initial_centroids = X[np.random.choice(X.shape[0], k, replace=False)]

def compute_distances(X, centroids):
    distances = np.zeros((X.shape[0], centroids.shape[0]))
    for i in range(X.shape[0]):
        for j in range(centroids.shape[0]):
            distances[i, j] = np.linalg.norm(X[i] - centroids[j])
    return distances

def assign_clusters(distances):
    return np.argmin(distances, axis=1)

def update_centroids(X, labels, k):
    centroids = np.zeros((k, X.shape[1]))
    for i in range(k):
        centroids[i, :] = X[labels == i].mean(axis=0)
    return centroids

def kmeans(X, k, max_iters=100):
    centroids = initial_centroids
    for _ in range(max_iters):
        old_centroids = centroids
        distances = compute_distances(X, centroids)
        labels = assign_clusters(distances)
        centroids = update_centroids(X, labels, k)
        # Check for convergence
        if np.all(centroids == old_centroids):
            break
    return labels, centroids

labels, centroids = kmeans(X, k)

# Adding cluster labels to DataFrame
df['Cluster'] = labels

print(df)
print("Centroids:")
print(centroids)


   Feature1  Feature2  Cluster
0        10        30        0
1        15        25        0
2        20        20        0
3        25        15        1
4        30        10        2
Centroids:
[[15. 25.]
 [25. 15.]
 [30. 10.]]
