In [None]:
!OMP_WAIT_POLICY=PASSIVE

In [None]:
import faiss
import numpy as np


class FaissKMeans:
    def __init__(self, n_clusters=4, n_init=1000, max_iter=300, random_state=42):
        self.n_clusters = n_clusters
        self.n_init = n_init
        self.max_iter = max_iter
        self.seed = random_state
        self.kmeans = None
        self.cluster_centers_ = None
        self.inertia_ = None

    def fit(self, X):
        self.kmeans = faiss.Kmeans(d=X.shape[1],
                                   k=self.n_clusters,
                                   niter=self.max_iter,
                                   nredo=self.n_init,
                                   seed=self.seed)
        self.kmeans.train(X.astype(np.float32))
        self.cluster_centers_ = self.kmeans.centroids
        self.inertia_ = self.kmeans.obj[-1]

    def predict(self, X):
        return self.kmeans.index.search(X.astype(np.float32), 1)[1]

In [None]:
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=1000000, centers=8, n_features=30,
                  random_state=42)

In [None]:
%%time
ff = FaissKMeans()
ff.fit(X)

CPU times: user 2min 27s, sys: 2.18 s, total: 2min 29s
Wall time: 1min 39s


In [None]:
from sklearn.cluster import KMeans

In [None]:
%%time
km = KMeans(random_state=42, n_clusters=4, n_init=1000, max_iter=300)
km.fit(X)

CPU times: user 10min 56s, sys: 1min 7s, total: 12min 3s
Wall time: 8min 30s


KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=4, n_init=1000, n_jobs=None, precompute_distances='auto',
       random_state=42, tol=0.0001, verbose=0)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=19287847-4e9c-4e57-80f4-c1e388f529ae' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>