In [1]:
import numpy as np

# data

In [2]:
size = 1000
X = np.random.rand(size, size)
Y = np.random.rand(size, size)

# [euclidean](https://en.wikipedia.org/wiki/Euclidean_distance#Definition)

$$
dist(x, y) =  \sqrt{\sum_i(x_i - y_i)^2} = \sqrt{\sum_i(x_i^2 - 2x_i y_i + y_i^2)} = \sqrt{\sum_ix_i^2 -2 \sum_ix_i y_i + \sum_iy_i^2}
$$

In [3]:
from sklearn.metrics.pairwise import euclidean_distances as sklearn_euclidean

In [4]:
def euclidean_dist(X, Y):
    x_dist = (X ** 2).sum(axis=1)
    y_dist = (Y ** 2).sum(axis=1)
    xy_dist = X.dot(Y.T)
    dist = - 2 * xy_dist + y_dist + x_dist.reshape(-1, 1)
    return np.sqrt(dist)

In [7]:
%time Euc_sklearn = sklearn_euclidean(X, Y)

CPU times: user 81.7 ms, sys: 3.29 ms, total: 85 ms
Wall time: 45.6 ms


In [8]:
%time Euc_dist = euclidean_dist(X, Y)

CPU times: user 92.5 ms, sys: 12.7 ms, total: 105 ms
Wall time: 54.9 ms


In [9]:
np.abs(Euc_dist - Euc_sklearn).sum()

6.881201386477187e-09

# [cosine](https://en.wikipedia.org/wiki/Cosine_similarity#Definition)

$$ \cos ({\bf x},{\bf y})= \frac{{\bf x} {\bf y}}{\|{\bf x}\| \|{\bf y}\|} = \frac{ \sum_{i=1}^{n}{{x}_i{y}_i} }{ \sqrt{\sum_{i=1}^{n}{{x}_i^2}} \sqrt{\sum_{i=1}^{n}{{y}_i^2}} } $$

In [10]:
from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine

In [11]:
def cosine_similarity(X, Y):
    X_row_norm = np.sum(X ** 2, axis=1)
    Y_row_norm = np.sum(Y ** 2, axis=1)
    XY_norm = (X_row_norm.reshape(-1, 1) * Y_row_norm.reshape(1, -1)) ** 0.5
    return X.dot(Y.T) / XY_norm

In [12]:
%time Cos_sklearn = sklearn_cosine(X, Y)

CPU times: user 81 ms, sys: 8.4 ms, total: 89.4 ms
Wall time: 46.7 ms


In [13]:
%time Cos_sim = cosine_similarity(X, Y)

CPU times: user 77.1 ms, sys: 6.12 ms, total: 83.2 ms
Wall time: 43 ms


In [14]:
np.abs(Cos_sim - Cos_sklearn).sum()

2.601329152085441e-10