# Distances between points

Computing the distances between pairs of points.

- `scipy.spatial.distance.pdist` is quicker
- Using a simple python loop is insanely slow
- Using some `numpy` magic is a bit slower (5-6 times) but more flexible

In [1]:
import numpy as np
import scipy.spatial

In [2]:
points = np.random.random(size=10).reshape((5,2))
points[0]

array([ 0.55166226,  0.8192395 ])

In [3]:
pdist = scipy.spatial.distance.pdist(points)
pdist

array([ 0.20258083,  0.47973149,  0.55668766,  0.77171257,  0.29286388,
        0.59530553,  0.76494949,  0.62256019,  0.70024947,  0.24535585])

In [4]:
def manual_dists(points):
    return [ np.sqrt(np.sum((points[i] - points[j])**2))
        for i in range(points.shape[0])
            for j in range(i+1, points.shape[0]) ]

np.testing.assert_allclose(manual_dists(points), pdist)

In [5]:
def numpy_dists(points):
    numpts = points.shape[0]
    out = np.empty(numpts * (numpts - 1) // 2)
    index = 0
    for i in range(numpts - 1):
        pts = points[i+1:]
        x = np.sqrt(np.sum((pts - points[i][None,:])**2, axis=1))
        out[index : index + x.shape[0]] = x
        index += x.shape[0]
    return out
        
np.testing.assert_allclose(numpy_dists(points), pdist)

# Timing

In [6]:
points = np.random.random(size=(1000,2))

In [7]:
%timeit( scipy.spatial.distance.pdist(points) )

100 loops, best of 3: 3.57 ms per loop


In [8]:
%timeit( manual_dists(points) )

1 loop, best of 3: 5.52 s per loop


In [9]:
%timeit( numpy_dists(points) )

10 loops, best of 3: 35.6 ms per loop


In [10]:
points = np.random.random(size=(10000,2))

In [11]:
%timeit( scipy.spatial.distance.pdist(points) )

1 loop, best of 3: 352 ms per loop


In [12]:
%timeit( numpy_dists(points) )

1 loop, best of 3: 1.92 s per loop


In [13]:
points = np.random.random(size=(30000,2))

In [14]:
%timeit( scipy.spatial.distance.pdist(points) )

1 loop, best of 3: 3.18 s per loop


In [15]:
%timeit( numpy_dists(points) )

1 loop, best of 3: 16.1 s per loop
