In [None]:
import random
import numpy as np

class KMeans:
    def __init__(self, n_clusters=2, max_iter=100):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.centroids = None

    def fit_predict(self, X):

        random_index = random.sample(range(0, X.shape[0]), self.n_clusters)
        self.centroids = X[random_index]

        for i in range(self.max_iter):

            cluster_group = self.assign_clusters(X)


            old_centroids = self.centroids.copy()


            self.centroids = self.move_centroids(X, cluster_group)


            if np.allclose(old_centroids, self.centroids):
                break

        return cluster_group

    def assign_clusters(self, X):

        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)

        return np.argmin(distances, axis=1)

    def move_centroids(self, X, cluster_group):

        new_centroids = []
        for k in range(self.n_clusters):
            new_centroids.append(X[cluster_group == k].mean(axis=0))
        return np.array(new_centroids)


In [None]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler


# iris = datasets.load_iris()
# X = iris.data


# scaler = StandardScaler()
# X = scaler.fit_transform(X)
X = np.array([
    [2, 3], [1, 1], [2, 2], [4, 4], [6, 6], [7, 8], [1, 3], [3, 3],
    [5, 5], [8, 8], [3, 7], [9, 3], [6, 2], [4, 5], [5, 7], [2, 6],
    [3, 6], [7, 9], [8, 7], [5, 6], [6, 4], [7, 5], [2, 5], [4, 6],
    [1, 2], [8, 3], [3, 8], [2, 4], [6, 5], [9, 6], [4, 2], [5, 4]
])


kmeans = KMeans(n_clusters=3, max_iter=100)
clusters = kmeans.fit_predict(X)

print("Cluster assignments:", clusters)
print("Centroids:", kmeans.centroids)


Cluster assignments: [2 2 2 1 0 0 2 2 0 0 1 0 2 1 0 1 1 0 0 0 0 0 1 1 2 0 1 2 0 0 2 0]
Centroids: [[6.73333333 5.73333333]
 [3.125      5.875     ]
 [2.44444444 2.44444444]]


In [None]:
# from sklearn.metrics import accuracy_score
# from scipy.stats import mode


# labels = np.zeros_like(clusters)
# for i in range(3):
#     mask = (clusters == i)
#     labels[mask] = mode(iris.target[mask])[0]

# accuracy = accuracy_score(iris.target, labels)
# print("Clustering accuracy:", accuracy)


In [None]:
true_labels = np.array([
    2 ,2, 2, 1 ,0, 0, 2, 2, 0 ,0, 1, 0, 2, 1, 0, 0, 1, 0 ,0, 0, 0, 0, 1 ,1, 2, 0, 1, 2, 0, 0 ,2 ,0
])

In [None]:
def calculate_accuracy(true_labels, predicted_labels):
    labels = np.zeros_like(predicted_labels)
    for i in range(np.max(predicted_labels) + 1):
        mask = (predicted_labels == i)
        # Find the most common true label for each cluster
        labels[mask] = mode(true_labels[mask])[0]
    return np.mean(labels == true_labels)

accuracy = calculate_accuracy(true_labels, clusters)
print("Accuracy:", accuracy)

Accuracy: 0.96875
