In [311]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import silhouette_score
X, y = load_iris(return_X_y=True)

In [367]:
class KMeans:
    def __init__(self, n_clusters:int, n_iters:int)->None:
        self.n_clusters = n_clusters
        self.n_iters = n_iters
    
    def fit_predict(self, X:np.ndarray)->np.ndarray:
        rand_idx = np.random.randint(0, X.shape[0], (self.n_clusters))
        centroids = X[rand_idx]

        for _ in range(self.n_iters):
            distance = np.linalg.norm(X.reshape(X.shape[0], 1, X.shape[1]) - centroids, axis=2)
            labels = distance.argmin(axis=1)
            centroids = [X[labels==k].mean(axis=0) for k in range(self.n_clusters)]
        return labels

y_pred = KMeans(n_clusters=3, n_iters=1000).fit_predict(X)
silhouette_score(X, y_pred)

0.5528190123564095

In [360]:
from sklearn.metrics import pairwise_distances

class KMeans:
    def __init__(self, n_clusters:int, n_iters:int)->None:
        self.n_clusters = n_clusters
        self.n_iters = n_iters
    
    def fit_predict(self, X:np.ndarray)->np.ndarray:
        rand_idx = np.random.randint(0, X.shape[0], (self.n_clusters))
        centroids = X[rand_idx]

        for _ in range(self.n_iters):
            distance = pairwise_distances(X, centroids)
            labels = distance.argmin(axis=1)
            
            new_centroids = []
            for k in range(self.n_clusters):
                new_centroids.append(X[labels==k].mean(axis=0))
            centroids = new_centroids

        return labels

y_pred = KMeans(n_clusters=3, n_iters=100).fit_predict(X)
silhouette_score(X, y_pred)

0.551191604619592