In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist

In [2]:
X = np.random.rand(1000, 1000)

In [3]:
%%time
neigh = NearestNeighbors(n_neighbors=5)
neigh.fit(X)

Wall time: 2min 43s


In [4]:
# K-nearest neighbours
%timeit neigh.kneighbors(X[0].reshape(1, -1))

2.47 s ± 197 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
# Euclidean hand made Non-vectorised
%timeit distances = np.array([np.linalg.norm(vec - X[0]) for vec in X])

2.01 s ± 266 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# Euclidean hand made vectorised
%timeit distances = np.linalg.norm(X - X[0], axis=1) # vectorised

In [None]:
# Cosine scikit-learn
%timeit distances = cosine_similarity(X, X[0].reshape(1, -1))

In [None]:
# Euclidean distance scikit learn
%timeit euclidean_distances(X[0].reshape(1, -1), X)

In [None]:
%timeit np.asarray(cdist(X[0].reshape(1, -1), X)) #np.asarray([cdist(x,y) for x, y in zip(A, B)])

In [None]:
# pure numpy with broadcasting
#%timeit dists2 = np.sqrt( np.sum( (X[:,:,np.newaxis,:] - B[:,np.newaxis,:,:])**2, axis=-1) )
%timeit dists2 = np.sqrt( np.sum( (X[:,:,np.newaxis,:] - X[0])**2, axis=-1) )

In [None]:
# todo try:
# - https://github.com/droyed/eucl_dist
# - linear_kernel
# - manual cosine
# - safe_sparse_dot
# - general calculations above with speed metrics
-