In [50]:
from numba import double
from numba.decorators import jit, autojit
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [51]:
import numpy as np
X = np.random.random((1000, 3))

In [52]:
def pairwise_numpy(X):
    return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))
%timeit pairwise_numpy(X)

10 loops, best of 3: 65.6 ms per loop


In [49]:
%lprun -s -f pairwise_python -T lp_results.txt pairwise_python(X)


*** Profile printout saved to text file u'lp_results.txt'. 


In [54]:
def pairwise_python(X):
    M = X.shape[0]
    N = X.shape[1]
    D = np.empty((M, M), dtype=np.float)
    for i in range(M):
        for j in range(M):
            d = 0.0
            for k in range(N):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = np.sqrt(d)
    return D
%timeit pairwise_python(X)

1 loop, best of 3: 2.89 s per loop


In [55]:
from numba import double
from numba.decorators import jit, autojit

pairwise_numba = autojit(pairwise_python)

%timeit pairwise_numba(X)

The slowest run took 32.21 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 8.55 ms per loop


# Another example

In [28]:
from numba import jit
from numpy import arange

In [30]:
@jit
def sum2d(arr):
    M, N = arr.shape
    result = 0.0
    for i in range(M):
        for j in range(N):
            result += arr[i,j]
    return result

a = arange(9).reshape(3,3)
%timeit sum2d(a)
print sum2d(a)

The slowest run took 163143.91 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 611 ns per loop
36.0


In [46]:
def pairwise_python_rand(X):
    M = X.shape[0]
    N = X.shape[1]
    Pos = X >= 0.5
    Pos = X[Pos]
    Neg = X < 0.5
    Neg = X[Neg]
    D = 0
    for j in range(len(Pos)):
        D += Pos[j]
        for k in range(len(Neg)):
            D += Neg[k]
    return D

%timeit pairwise_python_rand(X)

10000 loops, best of 3: 40.6 µs per loop
