In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [4]:
ints = np.ones(10, dtype=np.uint16)
ints

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint16)

In [5]:
floats = np.ones(10, dtype=np.float32)
floats

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [9]:
np.issubdtype(ints.dtype, np.number)

True

In [10]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [11]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [12]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [13]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [14]:
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
other_arr = np.ones((3, 5))
other_arr

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [16]:
other_arr.shape

(3, 5)

In [17]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [19]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [20]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [22]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [23]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [24]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr1

array([[1, 2, 3],
       [4, 5, 6]])

In [25]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
arr2

array([[ 7,  8,  9],
       [10, 11, 12]])

In [26]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [27]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [28]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [30]:
arr = np.random.randn(5, 2)
arr

array([[-0.309 ,  0.9409],
       [ 1.4133, -0.4291],
       [ 0.4895,  1.2315],
       [-0.4456,  0.6879],
       [-0.5683,  0.9327]])

In [31]:
first, second, third = np.split(arr, [1, 3])
first

array([[-0.309 ,  0.9409]])

In [32]:
second

array([[ 1.4133, -0.4291],
       [ 0.4895,  1.2315]])

In [33]:
third

array([[-0.4456,  0.6879],
       [-0.5683,  0.9327]])

In [34]:
arr = np.arange(6)
arr

array([0, 1, 2, 3, 4, 5])

In [35]:
arr1 = arr.reshape((3, 2))
arr1

array([[0, 1],
       [2, 3],
       [4, 5]])

In [36]:
arr2 = np.random.randn(3, 2)
arr2

array([[-0.3403,  1.9958],
       [ 0.3897,  0.8498],
       [ 1.0758, -1.2754]])

In [37]:
np.r_[arr1, arr2]

array([[ 0.    ,  1.    ],
       [ 2.    ,  3.    ],
       [ 4.    ,  5.    ],
       [-0.3403,  1.9958],
       [ 0.3897,  0.8498],
       [ 1.0758, -1.2754]])

In [38]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [-0.3403,  1.9958,  3.    ],
       [ 0.3897,  0.8498,  4.    ],
       [ 1.0758, -1.2754,  5.    ]])

In [39]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [40]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [41]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [42]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [43]:
arr = np.random.randn(2, 2)
arr

array([[-1.1752, -1.3495],
       [-0.3653,  0.4325]])

In [44]:
arr.repeat(2, axis=0)

array([[-1.1752, -1.3495],
       [-1.1752, -1.3495],
       [-0.3653,  0.4325],
       [-0.3653,  0.4325]])

In [45]:
arr.repeat([2, 3], axis=0)

array([[-1.1752, -1.3495],
       [-1.1752, -1.3495],
       [-0.3653,  0.4325],
       [-0.3653,  0.4325],
       [-0.3653,  0.4325]])

In [46]:
arr.repeat([2, 3], axis=1)

array([[-1.1752, -1.1752, -1.3495, -1.3495, -1.3495],
       [-0.3653, -0.3653,  0.4325,  0.4325,  0.4325]])

In [47]:
arr

array([[-1.1752, -1.3495],
       [-0.3653,  0.4325]])

In [48]:
np.tile(arr, 2)

array([[-1.1752, -1.3495, -1.1752, -1.3495],
       [-0.3653,  0.4325, -0.3653,  0.4325]])

In [49]:
arr

array([[-1.1752, -1.3495],
       [-0.3653,  0.4325]])

In [50]:
np.tile(arr, (2, 1))

array([[-1.1752, -1.3495],
       [-0.3653,  0.4325],
       [-1.1752, -1.3495],
       [-0.3653,  0.4325]])

In [51]:
np.tile(arr, (3, 2))

array([[-1.1752, -1.3495, -1.1752, -1.3495],
       [-0.3653,  0.4325, -0.3653,  0.4325],
       [-1.1752, -1.3495, -1.1752, -1.3495],
       [-0.3653,  0.4325, -0.3653,  0.4325],
       [-1.1752, -1.3495, -1.1752, -1.3495],
       [-0.3653,  0.4325, -0.3653,  0.4325]])

In [52]:
arr = np.arange(10) * 100
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [53]:
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

In [54]:
arr.take(inds)

array([700, 100, 200, 600])

In [55]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [56]:
arr.put(inds, [40, 41, 42, 43])
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [57]:
inds = [2, 0, 2, 1]
arr = np.random.randn(2, 4)
arr

array([[ 0.2554, -0.1389,  0.788 ,  0.5693],
       [ 0.196 , -1.3074, -0.7739, -1.0356]])

In [58]:
arr.take(inds, axis=1)

array([[ 0.788 ,  0.2554,  0.788 , -0.1389],
       [-0.7739,  0.196 , -0.7739, -1.3074]])

In [59]:
arr = np.arange(5)
arr

array([0, 1, 2, 3, 4])

In [60]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [61]:
arr = np.random.randn(4, 3)
arr

array([[-1.5316,  0.3947,  1.4904],
       [ 0.9092, -0.2171,  0.2669],
       [-1.1688, -0.1849,  1.8295],
       [-1.7841,  0.46  , -0.3448]])

In [62]:
arr.mean(0)

array([-0.8938,  0.1132,  0.8105])

In [63]:
demeaned = arr - arr.mean(0)
demeaned

array([[-0.6378,  0.2816,  0.6799],
       [ 1.803 , -0.3303, -0.5436],
       [-0.275 , -0.2981,  1.019 ],
       [-0.8902,  0.3469, -1.1554]])

In [64]:
demeaned.mean(0)

array([0., 0., 0.])

In [65]:
arr

array([[-1.5316,  0.3947,  1.4904],
       [ 0.9092, -0.2171,  0.2669],
       [-1.1688, -0.1849,  1.8295],
       [-1.7841,  0.46  , -0.3448]])

In [66]:
row_means = arr.mean(1)
row_means

array([ 0.1178,  0.3197,  0.1586, -0.5563])

In [67]:
row_means.shape

(4,)

In [68]:
row_means.reshape((4, 1))

array([[ 0.1178],
       [ 0.3197],
       [ 0.1586],
       [-0.5563]])

In [69]:
demeaned = arr - row_means.reshape((4, 1))
demeaned

array([[-1.6495,  0.2769,  1.3726],
       [ 0.5895, -0.5368, -0.0527],
       [-1.3274, -0.3435,  1.6709],
       [-1.2278,  1.0163,  0.2115]])

In [70]:
demeaned.mean(1)

array([-0., -0.,  0., -0.])

In [71]:
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [72]:
arr - arr.mean(1).reshape((4, 1))

array([[-1.6495,  0.2769,  1.3726],
       [ 0.5895, -0.5368, -0.0527],
       [-1.3274, -0.3435,  1.6709],
       [-1.2278,  1.0163,  0.2115]])

In [73]:
arr = np.zeros((4, 4))
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [74]:
arr_3d = arr[:, np.newaxis, :]
arr_3d

array([[[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]]])

In [75]:
arr_3d.shape

(4, 1, 4)

In [76]:
arr_1d = np.random.normal(size=3)
arr_1d

array([ 0.1846, -0.9125,  0.5678])

In [77]:
arr_1d[:, np.newaxis]

array([[ 0.1846],
       [-0.9125],
       [ 0.5678]])

In [78]:
arr_1d[np.newaxis, :]

array([[ 0.1846, -0.9125,  0.5678]])

In [79]:
arr = np.random.randn(3, 4, 5)
arr

array([[[-0.6807,  1.0949, -0.4629, -0.1582,  1.3952],
        [ 0.2968,  2.007 ,  0.256 ,  0.7409, -0.8171],
        [ 0.4765, -0.6397,  0.0938, -0.4935, -0.0609],
        [-0.1093,  1.2696,  0.9723, -0.5679,  0.1621]],

       [[-0.417 ,  0.9201,  1.3175, -0.4237, -1.0363],
        [ 2.002 , -0.8551, -0.8415, -0.3398, -0.4759],
        [ 1.8468, -1.9145, -0.939 ,  1.9876, -1.054 ],
        [ 1.5929, -0.4226,  0.7342,  0.4448,  0.2818]],

       [[ 0.0367, -1.5099, -1.8396, -0.5253,  0.4495],
        [ 1.9338, -0.7109,  0.7693,  0.8868,  0.2854],
        [ 0.2631,  0.5585,  0.1058,  0.405 ,  1.1105],
        [-0.7317,  0.6981,  0.9537,  1.4107, -0.3865]]])

In [80]:
depth_means = arr.mean(2)
depth_means

array([[ 0.2377,  0.4967, -0.1247,  0.3454],
       [ 0.0721, -0.102 , -0.0146,  0.5262],
       [-0.6777,  0.6329,  0.4886,  0.3889]])

In [81]:
depth_means.shape

(3, 4)

In [82]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned

array([[[-0.9184,  0.8572, -0.7006, -0.3959,  1.1576],
        [-0.1999,  1.5103, -0.2407,  0.2442, -1.3138],
        [ 0.6013, -0.5149,  0.2186, -0.3687,  0.0638],
        [-0.4547,  0.9243,  0.6269, -0.9132, -0.1832]],

       [[-0.4891,  0.848 ,  1.2454, -0.4958, -1.1084],
        [ 2.1041, -0.7531, -0.7394, -0.2377, -0.3738],
        [ 1.8614, -1.8999, -0.9244,  2.0022, -1.0394],
        [ 1.0667, -0.9489,  0.208 , -0.0815, -0.2444]],

       [[ 0.7144, -0.8322, -1.1619,  0.1525,  1.1272],
        [ 1.3009, -1.3437,  0.1364,  0.2539, -0.3475],
        [-0.2255,  0.07  , -0.3828, -0.0836,  0.6219],
        [-1.1205,  0.3092,  0.5648,  1.0218, -0.7753]]])

In [83]:
demeaned.mean(2)

array([[ 0., -0.,  0., -0.],
       [ 0., -0.,  0., -0.],
       [ 0., -0.,  0., -0.]])

In [84]:
arr = np.zeros((4, 3))
arr

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [85]:
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [86]:
col = np.array([1.28, -0.42, 0.44, 1.6])
col

array([ 1.28, -0.42,  0.44,  1.6 ])

In [87]:
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [88]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [89]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [90]:
np.add.reduce(arr)

45

In [91]:
arr.sum()

45

In [92]:
np.random.seed(12346)
arr = np.random.randn(5, 5)
arr

array([[-0.09  ,  0.7594,  0.7483, -0.9815,  0.3658],
       [-0.3154, -0.8661,  0.0279, -0.4556, -1.6019],
       [ 0.2483, -0.3215, -0.8487,  0.0005, -0.5465],
       [ 0.2539,  1.9368, -0.7995, -0.5692,  0.0489],
       [-0.6491, -0.4795, -0.9535,  1.4225,  0.1754]])

In [93]:
arr[::2].sort(1)
arr

array([[-0.9815, -0.09  ,  0.3658,  0.7483,  0.7594],
       [-0.3154, -0.8661,  0.0279, -0.4556, -1.6019],
       [-0.8487, -0.5465, -0.3215,  0.0005,  0.2483],
       [ 0.2539,  1.9368, -0.7995, -0.5692,  0.0489],
       [-0.9535, -0.6491, -0.4795,  0.1754,  1.4225]])

In [94]:
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True, False, False],
       [ True,  True,  True,  True],
       [ True, False,  True,  True],
       [ True,  True,  True,  True]])

In [95]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [96]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [97]:
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [98]:
arr = np.arange(3).repeat([1, 2, 2])
arr

array([0, 1, 1, 2, 2])

In [99]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [100]:
x, y = np.random.randn(3, 4), np.random.randn(5)
x

array([[ 0.5224,  0.1064,  0.1027, -0.1082],
       [ 0.0549,  0.1964, -0.1939, -1.4566],
       [ 0.8574, -0.7416, -0.7804, -0.1064]])

In [101]:
y

array([ 0.5937, -1.2835,  0.478 ,  1.2924,  0.1516])

In [102]:
result = np.subtract.outer(x, y)
result

array([[[-0.0713,  1.8059,  0.0445, -0.77  ,  0.3708],
        [-0.4873,  1.3899, -0.3715, -1.186 , -0.0452],
        [-0.491 ,  1.3862, -0.3752, -1.1897, -0.0489],
        [-0.7019,  1.1752, -0.5862, -1.4007, -0.2599]],

       [[-0.5389,  1.3383, -0.4231, -1.2376, -0.0968],
        [-0.3973,  1.4798, -0.2816, -1.0961,  0.0447],
        [-0.7876,  1.0896, -0.6718, -1.4863, -0.3455],
        [-2.0503, -0.1731, -1.9345, -2.749 , -1.6082]],

       [[ 0.2637,  2.1409,  0.3795, -0.435 ,  0.7058],
        [-1.3353,  0.5419, -1.2195, -2.034 , -0.8932],
        [-1.3741,  0.5031, -1.2583, -2.0728, -0.932 ],
        [-0.7001,  1.177 , -0.5844, -1.3989, -0.2581]]])

In [103]:
result.shape

(3, 4, 5)

In [104]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [105]:
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [106]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [107]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

In [108]:
def add_elements(x, y):
    return x + y

In [109]:
add_them = np.frompyfunc(add_elements, 2, 1)
add_them

<ufunc 'add_elements (vectorized)'>

In [110]:
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [111]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them

<numpy.vectorize at 0x247ab3cc550>

In [112]:
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [113]:
arr = np.random.randn(10000)
arr

array([-1.4663, -1.4334, -0.0978, ...,  0.1866,  0.4523, -1.177 ])

In [114]:
%timeit add_them(arr, arr)

1.42 ms ± 96.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [115]:
%timeit np.add(arr, arr)

2.74 µs ± 171 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [116]:
dtype = [('x', np.float64), ('y', np.int32)]
dtype

[('x', numpy.float64), ('y', numpy.int32)]

In [117]:
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([(1.5   ,  6), (3.1416, -2)], dtype=[('x', '<f8'), ('y', '<i4')])

In [118]:
sarr[0]

(1.5, 6)

In [119]:
sarr[0]['y']

6

In [120]:
sarr['x']

array([1.5   , 3.1416])

In [121]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [122]:
arr[0]['x']

array([0, 0, 0], dtype=int64)

In [123]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int64)

In [124]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data

array([((1., 2.), 5), ((3., 4.), 6)],
      dtype=[('x', [('a', '<f8'), ('b', '<f4')]), ('y', '<i4')])

In [125]:
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [126]:
data['y']

array([5, 6])

In [127]:
data['x']['a']

array([1., 3.])

In [128]:
arr = np.random.randn(6)
arr

array([ 1.1397,  1.2888,  0.3759, -1.082 ,  1.8413,  0.8014])

In [129]:
arr.sort()
arr

array([-1.082 ,  0.3759,  0.8014,  1.1397,  1.2888,  1.8413])

In [130]:
arr = np.random.randn(3, 5)
arr

array([[-0.3318, -1.4711,  0.8705, -0.0847, -1.1329],
       [-1.0111, -0.3436,  2.1714,  0.1234, -0.0189],
       [ 0.1773,  0.7424,  0.8548,  1.038 , -0.329 ]])

In [131]:
arr[:, 0].sort()  
arr

array([[-1.0111, -1.4711,  0.8705, -0.0847, -1.1329],
       [-0.3318, -0.3436,  2.1714,  0.1234, -0.0189],
       [ 0.1773,  0.7424,  0.8548,  1.038 , -0.329 ]])

In [132]:
arr = np.random.randn(5)
arr

array([-1.1181, -0.2415, -2.0051,  0.7379, -1.0614])

In [133]:
np.sort(arr)

array([-2.0051, -1.1181, -1.0614, -0.2415,  0.7379])

In [134]:
arr

array([-1.1181, -0.2415, -2.0051,  0.7379, -1.0614])

In [135]:
arr = np.random.randn(3, 5)
arr

array([[ 0.5955, -0.2682,  1.3389, -0.1872,  0.9111],
       [-0.3215,  1.0054, -0.5168,  1.1925, -0.1989],
       [ 0.3969, -1.7638,  0.6071, -0.2222, -0.2171]])

In [136]:
arr.sort(axis=1)
arr

array([[-0.2682, -0.1872,  0.5955,  0.9111,  1.3389],
       [-0.5168, -0.3215, -0.1989,  1.0054,  1.1925],
       [-1.7638, -0.2222, -0.2171,  0.3969,  0.6071]])

In [137]:
arr[:, ::-1]

array([[ 1.3389,  0.9111,  0.5955, -0.1872, -0.2682],
       [ 1.1925,  1.0054, -0.1989, -0.3215, -0.5168],
       [ 0.6071,  0.3969, -0.2171, -0.2222, -1.7638]])

In [138]:
values = np.array([5, 0, 1, 3, 2])
values

array([5, 0, 1, 3, 2])

In [139]:
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0], dtype=int64)

In [140]:
values[indexer]

array([0, 1, 2, 3, 5])

In [141]:
arr =np.random.randn(3, 5)
arr

array([[-1.2136, -0.8704, -0.2306,  1.0438, -1.1441],
       [-0.3636, -0.1378,  2.1777, -0.4728,  0.8356],
       [-0.2089,  0.2316,  0.728 , -1.3918,  1.9956]])

In [142]:
arr[0] = values
arr

array([[ 5.    ,  0.    ,  1.    ,  3.    ,  2.    ],
       [-0.3636, -0.1378,  2.1777, -0.4728,  0.8356],
       [-0.2089,  0.2316,  0.728 , -1.3918,  1.9956]])

In [143]:
arr[:, arr[0].argsort()]

array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [-0.1378,  2.1777,  0.8356, -0.4728, -0.3636],
       [ 0.2316,  0.728 ,  1.9956, -1.3918, -0.2089]])

In [144]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter

array([1, 2, 3, 0, 4], dtype=int64)

In [145]:
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

In [146]:
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
key = np.array([2, 2, 1, 1, 1])

In [147]:
indexer = key.argsort(kind='mergesort')
indexer

array([2, 3, 4, 0, 1], dtype=int64)

In [148]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

In [149]:
np.random.seed(12345)
arr = np.random.randn(20)
arr

array([-0.2047,  0.4789, -0.5194, -0.5557,  1.9658,  1.3934,  0.0929,
        0.2817,  0.769 ,  1.2464,  1.0072, -1.2962,  0.275 ,  0.2289,
        1.3529,  0.8864, -2.0016, -0.3718,  1.669 , -0.4386])

In [150]:
np.partition(arr, 3)

array([-2.0016, -1.2962, -0.5557, -0.5194, -0.3718, -0.4386, -0.2047,
        0.2817,  0.769 ,  0.4789,  1.0072,  0.0929,  0.275 ,  0.2289,
        1.3529,  0.8864,  1.3934,  1.9658,  1.669 ,  1.2464])

In [151]:
indices = np.argpartition(arr, 3)
indices

array([16, 11,  3,  2, 17, 19,  0,  7,  8,  1, 10,  6, 12, 13, 14, 15,  5,
        4, 18,  9], dtype=int64)

In [152]:
arr.take(indices)

array([-2.0016, -1.2962, -0.5557, -0.5194, -0.3718, -0.4386, -0.2047,
        0.2817,  0.769 ,  0.4789,  1.0072,  0.0929,  0.275 ,  0.2289,
        1.3529,  0.8864,  1.3934,  1.9658,  1.669 ,  1.2464])

In [153]:
arr = np.array([0, 1, 7, 12, 15])
arr

array([ 0,  1,  7, 12, 15])

In [154]:
arr.searchsorted(9)

3

In [155]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5], dtype=int64)

In [156]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr

array([0, 0, 0, 1, 1, 1, 1])

In [157]:
arr.searchsorted([0, 1])

array([0, 3], dtype=int64)

In [158]:
arr.searchsorted([0, 1], side='right')

array([3, 7], dtype=int64)

In [159]:
data = np.floor(np.random.uniform(0, 10000, size=50))
data

array([9940., 6768., 7908., 1709.,  268., 8003., 9037.,  246., 4917.,
       5262., 5963.,  519., 8950., 7282., 8183., 5002., 8101.,  959.,
       2189., 2587., 4681., 4593., 7095., 1780., 5314., 1677., 7688.,
       9281., 6094., 1501., 4896., 3773., 8486., 9110., 3838., 3154.,
       5683., 1878., 1258., 6875., 7996., 5735., 9732., 6340., 8884.,
       4954., 3516., 7142., 5039., 2256.])

In [160]:
bins = np.array([0, 100, 1000, 5000, 10000])
bins

array([    0,   100,  1000,  5000, 10000])

In [161]:
labels = bins.searchsorted(data)
labels

array([4, 4, 4, 3, 2, 4, 4, 2, 3, 4, 4, 2, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3,
       4, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
       4, 3, 3, 4, 4, 3], dtype=int64)

In [162]:
pd.Series(data).groupby(labels).mean()

2     498.000000
3    3064.277778
4    7389.035714
dtype: float64

In [163]:
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [164]:
x = np.random.randn(10000000)
y = np.random.randn(10000000)

In [165]:
%timeit mean_distance(x, y)

3.02 s ± 26.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [166]:
%timeit (x-y).mean()

42.9 ms ± 1.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [167]:
import numba as nb

In [168]:
numba_mean_distance = nb.jit(mean_distance)

In [169]:
%timeit numba_mean_distance(x, y)

11.5 ms ± 72.7 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [170]:
from numba import vectorize

In [171]:
@vectorize
def nb_add(x, y):
    return x + y

In [172]:
x = np.arange(10)

In [173]:
nb_add(x, x)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18], dtype=int64)

In [174]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                 shape=(10000, 10000))
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [175]:
section = mmap[:5]
section

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [176]:
section[:] = np.random.randn(5, 10000)
section

memmap([[ 1.3714,  0.9313,  0.6057, ..., -0.6212, -0.4678,  0.4787],
        [ 0.423 ,  0.8306,  0.6998, ...,  1.2883,  0.5886, -1.4276],
        [ 2.1601, -1.2462,  2.4447, ...,  0.8687,  0.2802,  2.1301],
        [ 2.4627,  0.7058, -0.3668, ..., -0.5129,  0.0571,  1.0193],
        [-1.2165,  0.0454, -1.3963, ..., -0.5507, -0.9115,  0.4016]])

In [177]:
mmap.flush()
mmap

memmap([[ 1.3714,  0.9313,  0.6057, ..., -0.6212, -0.4678,  0.4787],
        [ 0.423 ,  0.8306,  0.6998, ...,  1.2883,  0.5886, -1.4276],
        [ 2.1601, -1.2462,  2.4447, ...,  0.8687,  0.2802,  2.1301],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [178]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

memmap([[ 1.3714,  0.9313,  0.6057, ..., -0.6212, -0.4678,  0.4787],
        [ 0.423 ,  0.8306,  0.6998, ...,  1.2883,  0.5886, -1.4276],
        [ 2.1601, -1.2462,  2.4447, ...,  0.8687,  0.2802,  2.1301],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [179]:
arr_c = np.ones((100, 10000), order='C')
arr_c

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])

In [180]:
arr_f = np.ones((100, 10000), order='F')
arr_f

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])

In [181]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [182]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [183]:
arr_f.flags.f_contiguous

True

In [184]:
%timeit arr_c.sum(1)

526 µs ± 6.17 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [185]:
%timeit arr_f.sum(1)

395 µs ± 2.25 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [186]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [187]:
arr_c[:50].flags.contiguous

True

In [188]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False