In [3]:
import numpy as np

from sklearn.datasets import make_blobs

In [None]:
class Kmeans:
    def __init__(self, k_clusters, epoches=100):
        self.k_clusters = k_clusters
        self.epoches = epoches
        self.best_centroids = None

    def fit(self, X: np.ndarray):
        n_samples, n_features = X.shape
        random_indices = np.random.choice(n_samples, size=self.k_clusters, replace=False)
        centroids = X[random_indices]

        for _ in range(self.epoches):
            distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
            labels = np.argmin(distances, axis=1)

            # new_centroids = []
            # for cluster_j in range(self.k_clusters):
            #     centroid_j = X[labels == cluster_j].mean(axis=0)
            #     new_centroids.append(centroid_j)

            new_centroids = np.array([X[labels == cluster].mean(axis=0) for cluster in range(self.k_clusters)])

            new_centroids = np.array(new_centroids)

            if np.all(new_centroids == centroids):
                break

            centroids = new_centroids

        self.best_centroids = centroids

    def predict(self, X: np.ndarray):
        distances = np.linalg.norm(X[:, np.newaxis] - self.best_centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        return labels