In [8]:
# Python is slow for repeated low level execution tasks

def func_python(N):
    d = 0.0
    for i in range(N):
        d += (i % 3 - i) * i
    return d

%timeit func_python(1000000)


1 loop, best of 3: 249 ms per loop


In [21]:
# Using NUMPY, ufunc
a = list(range(100000)) #[1,3,2,4,3,1,4,2]
%timeit b = [val + 5 for val in a]
print(b)


100 loops, best of 3: 8.89 ms per loop
[6 8 7 9 8 6 9 7]


In [22]:
import numpy as np
a = np.array(a)
%timeit b = a + 5
print(b)

10000 loops, best of 3: 77.1 µs per loop
[6 8 7 9 8 6 9 7]


In [23]:
# Using NUMPY aggregations (min, max, mean, sum...)
from random import random
c = [random() for i in range(100000)]
%timeit min(c)


100 loops, best of 3: 2.74 ms per loop


In [24]:
c = np.array(c)
%timeit c.min()


The slowest run took 11.14 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 35 µs per loop


In [30]:
def func_pythonNP(N):
    d = 0.0
    n = np.arange(N)
    #for i in range(N):
    #    d += (i % 3 - i) * i
    a = ((n % 3 - n) * n).sum()
    
    return a

%timeit func_pythonNP(1000000)

10 loops, best of 3: 22.7 ms per loop


In [33]:
# Broadcasting
# rules by which ufuncs operates on arrays of different dimensions...



In [36]:
# slicing, masking and fancy indexing.
L = np.array([1,4,7,9,10])
mask = (L < 4) | (L > 8)
print(mask)
print(L[mask])

[ True False False  True  True]
[ 1  9 10]


In [37]:
# fancy indexin
ind = [0,4,2]
L[ind]


array([ 1, 10,  7])

In [76]:
# Broadcasting tests
a = np.arange(24)
a = a.reshape([2,3,4])
b = np.arange(100,108)
b = b.reshape([2,1,4])
print('a : {}'.format(a))
print('b : {}'.format(b))
c = a + b
print(c)

a : [[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
b : [[[100 101 102 103]]

 [[104 105 106 107]]]
[[[100 102 104 106]
  [104 106 108 110]
  [108 110 112 114]]

 [[116 118 120 122]
  [120 122 124 126]
  [124 126 128 130]]]


In [82]:
# Full example : nearest neighbour
X = np.random.random((4, 3))
print('X.shape : {}'.format(X.shape))
XR = X.reshape(4,1,3)
print('X : {}'.format(X))
print('XR : {}'.format(XR))
diff = XR - X
diff2 = XR - X[0]


print('Diff.shape : {}'.format(diff.shape))
print(diff)
print(diff2)

D = (diff **2).sum(2)
print('Distance: {}'.format(D))
D.shape

i = np.arange(4)
D[i,i] = np.inf

i = np.argmin(D, 1)
print(i[:10])

X.shape : (4, 3)
X : [[ 0.12560274  0.66523559  0.67566357]
 [ 0.80618213  0.24626322  0.41169285]
 [ 0.81150851  0.84453963  0.66452862]
 [ 0.51308103  0.31035441  0.22711236]]
XR : [[[ 0.12560274  0.66523559  0.67566357]]

 [[ 0.80618213  0.24626322  0.41169285]]

 [[ 0.81150851  0.84453963  0.66452862]]

 [[ 0.51308103  0.31035441  0.22711236]]]
Diff.shape : (4, 4, 3)
[[[ 0.          0.          0.        ]
  [-0.68057939  0.41897236  0.26397072]
  [-0.68590577 -0.17930404  0.01113495]
  [-0.38747829  0.35488118  0.4485512 ]]

 [[ 0.68057939 -0.41897236 -0.26397072]
  [ 0.          0.          0.        ]
  [-0.00532638 -0.5982764  -0.25283577]
  [ 0.2931011  -0.06409119  0.18458048]]

 [[ 0.68590577  0.17930404 -0.01113495]
  [ 0.00532638  0.5982764   0.25283577]
  [ 0.          0.          0.        ]
  [ 0.29842748  0.53418521  0.43741625]]

 [[ 0.38747829 -0.35488118 -0.4485512 ]
  [-0.2931011   0.06409119 -0.18458048]
  [-0.29842748 -0.53418521 -0.43741625]
  [ 0.          0.  

In [83]:
from sklearn.neighbors import NearestNeighbors
d, i = NearestNeighbors().fit(X).kneighbors(X,2)
print(i[:10,1])


[3 3 1 1]
