# ndarray Object Internals
np.ndarray object has the following attributes:
- data pointer: Points to the block of memory
- dtype: Datatype
- shape: Shape of ndarray (tuple of dimensions)
- strides: Stores the number of bytes to jump to next element in ndarray. Helps in creating memory efficient views of ndarray.

In [1]:
import numpy as np

In [2]:
arr = np.ones((10, 5))

In [3]:
arr.shape

(10, 5)

In [4]:
arr.strides

(40, 8)

In [5]:
arr.dtype

dtype('float64')

In [6]:
arr.data

<memory at 0x7bb420ea45f0>

In [7]:
np.ones((3, 4, 5), dtype= 'float64').strides

(160, 40, 8)

## NumPy Data Type Hierarchy

In [8]:
ints = np.ones(10, dtype = 'uint8')

In [9]:
floats = np.ones(10, dtype= 'float32')

In [10]:
np.issubdtype(ints.dtype, np.integer)

True

In [11]:
np.issubdtype(floats.dtype, np.floating)

True

In [12]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [13]:
np.issubdtype(ints.dtype, np.number)

True

# Advanced Array Manipulation
## Reshaping Arrays

In [14]:
arr = np.arange(12)

In [15]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [16]:
arr.reshape((4, 3), order= 'F')     # fortran order

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [17]:
arr.reshape((4, 3), order= 'C')     # C order

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [18]:
arr.reshape((4, 3)).reshape((2, 6))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [19]:
arr = np.arange(15)

In [20]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [22]:
other_arr = np.ones((3, 5))

In [23]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [24]:
arr = np.arange(15).reshape((3, 5))

In [25]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [26]:
arr.ravel()    # doesnot give a copy

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [27]:
arr.flatten()    # Always gives a copy

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## Concatenating and Splitting Arrays

In [28]:
arr1 = np.array(
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
)

In [29]:
arr2 = np.array(
    [
        [7, 8, 9],
        [10, 11, 12]
    ]
)

In [30]:
np.concatenate([arr1, arr2])

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.concatenate([arr1, arr2], axis= 1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [33]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [34]:
arr = np.random.standard_normal((6, 2))

In [35]:
arr

array([[ 0.54156089, -0.17162157],
       [ 0.20630263,  0.88763421],
       [ 2.14159198, -0.37729095],
       [-1.81867482, -1.55000205],
       [-0.3856897 , -0.51358811],
       [ 0.7595464 ,  0.84743584]])

In [36]:
first, second, third = np.split(arr, [1, 3])

In [37]:
first

array([[ 0.54156089, -0.17162157]])

In [38]:
second

array([[ 0.20630263,  0.88763421],
       [ 2.14159198, -0.37729095]])

In [39]:
third

array([[-1.81867482, -1.55000205],
       [-0.3856897 , -0.51358811],
       [ 0.7595464 ,  0.84743584]])

In [40]:
np.split(arr, 2)

[array([[ 0.54156089, -0.17162157],
        [ 0.20630263,  0.88763421],
        [ 2.14159198, -0.37729095]]),
 array([[-1.81867482, -1.55000205],
        [-0.3856897 , -0.51358811],
        [ 0.7595464 ,  0.84743584]])]

In [41]:
np.split(arr, 3)

[array([[ 0.54156089, -0.17162157],
        [ 0.20630263,  0.88763421]]),
 array([[ 2.14159198, -0.37729095],
        [-1.81867482, -1.55000205]]),
 array([[-0.3856897 , -0.51358811],
        [ 0.7595464 ,  0.84743584]])]

In [42]:
np.split(arr, [1, 2, 5])

[array([[ 0.54156089, -0.17162157]]),
 array([[0.20630263, 0.88763421]]),
 array([[ 2.14159198, -0.37729095],
        [-1.81867482, -1.55000205],
        [-0.3856897 , -0.51358811]]),
 array([[0.7595464 , 0.84743584]])]

In [43]:
np.row_stack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [44]:
np.column_stack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [45]:
np.dstack((arr1, arr2))

array([[[ 1,  7],
        [ 2,  8],
        [ 3,  9]],

       [[ 4, 10],
        [ 5, 11],
        [ 6, 12]]])

In [46]:
np.hsplit(arr, 2)

[array([[ 0.54156089],
        [ 0.20630263],
        [ 2.14159198],
        [-1.81867482],
        [-0.3856897 ],
        [ 0.7595464 ]]),
 array([[-0.17162157],
        [ 0.88763421],
        [-0.37729095],
        [-1.55000205],
        [-0.51358811],
        [ 0.84743584]])]

In [47]:
np.vsplit(arr, 2)

[array([[ 0.54156089, -0.17162157],
        [ 0.20630263,  0.88763421],
        [ 2.14159198, -0.37729095]]),
 array([[-1.81867482, -1.55000205],
        [-0.3856897 , -0.51358811],
        [ 0.7595464 ,  0.84743584]])]

###Stacking helpers: r_ and c_

In [48]:
arr1 = np.arange(6).reshape((3, 2))

In [49]:
arr2 = np.random.standard_normal((3, 2))

In [50]:
arr1

array([[0, 1],
       [2, 3],
       [4, 5]])

In [51]:
arr2

array([[-0.81247872, -0.58376553],
       [-0.1179163 , -0.46507587],
       [-0.54774526,  1.27354492]])

In [52]:
np.r_[arr1, arr2]   #direct horizontal stacking

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-0.81247872, -0.58376553],
       [-0.1179163 , -0.46507587],
       [-0.54774526,  1.27354492]])

In [53]:
np.c_[arr1, arr2]   #direct vertical stacking

array([[ 0.        ,  1.        , -0.81247872, -0.58376553],
       [ 2.        ,  3.        , -0.1179163 , -0.46507587],
       [ 4.        ,  5.        , -0.54774526,  1.27354492]])

In [54]:
np.c_[np.r_[arr1, arr2], np.arange(6)]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [-0.81247872, -0.58376553,  3.        ],
       [-0.1179163 , -0.46507587,  4.        ],
       [-0.54774526,  1.27354492,  5.        ]])

In [55]:
np.c_[1:6, -10:-1:2]     #translates slices to arrays

array([[  1, -10],
       [  2,  -8],
       [  3,  -6],
       [  4,  -4],
       [  5,  -2]])

## Repeating Elements: tile and repeat

In [56]:
arr = np.arange(3)

In [57]:
arr

array([0, 1, 2])

In [58]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [59]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [60]:
arr = np.arange(4).reshape((2, 2))

In [61]:
arr.repeat(2, axis= 0)

array([[0, 1],
       [0, 1],
       [2, 3],
       [2, 3]])

In [62]:
arr.repeat(2, axis= 1)

array([[0, 0, 1, 1],
       [2, 2, 3, 3]])

In [63]:
arr.repeat([2, 3], axis= 0)

array([[0, 1],
       [0, 1],
       [2, 3],
       [2, 3],
       [2, 3]])

In [64]:
arr.repeat([2, 3], axis= 1)

array([[0, 0, 1, 1, 1],
       [2, 2, 3, 3, 3]])

In [65]:
np.tile(arr, 3)

array([[0, 1, 0, 1, 0, 1],
       [2, 3, 2, 3, 2, 3]])

In [66]:
np.tile(arr, [2, 1])

array([[0, 1],
       [2, 3],
       [0, 1],
       [2, 3]])

In [67]:
np.tile(arr, (2, 3))

array([[0, 1, 0, 1, 0, 1],
       [2, 3, 2, 3, 2, 3],
       [0, 1, 0, 1, 0, 1],
       [2, 3, 2, 3, 2, 3]])

## Fancy Indexing Equivalents: take and put

In [68]:
arr = np.arange(10) * 100

In [69]:
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [70]:
indices = [7, 1, 2, 6]

In [71]:
arr[indices]

array([700, 100, 200, 600])

In [72]:
arr.take(indices)

array([700, 100, 200, 600])

In [73]:
arr.put(indices, 23)

In [74]:
arr

array([  0,  23,  23, 300, 400, 500,  23,  23, 800, 900])

In [75]:
arr.put(indices, [4, 5, 6, 7])

In [76]:
arr

array([  0,   5,   6, 300, 400, 500,   7,   4, 800, 900])

In [77]:
indices = [2, 0, 2, 1]

In [78]:
arr = np.random.standard_normal((2, 4))

In [79]:
arr.take(indices, axis= 1)

array([[ 0.32387995,  1.2651746 ,  0.32387995, -0.90522333],
       [ 0.58249629, -0.40275245,  0.58249629, -2.0075156 ]])

# Broadcasting

In [80]:
arr = np.arange(4)

In [81]:
arr * 4

array([ 0,  4,  8, 12])

In [82]:
arr = np.random.standard_normal((4, 3))

In [83]:
arr.mean(0)

array([-0.57956506, -0.28896561,  0.58795803])

In [84]:
demeaned = arr - arr.mean(0)

In [85]:
demeaned

array([[ 0.89239789, -2.04813182,  0.91374481],
       [ 1.19437972,  0.81347238, -0.71269719],
       [ 0.12417473,  0.09022324, -0.20499489],
       [-2.21095234,  1.14443619,  0.00394727]])

In [86]:
demeaned.mean(0)

array([ 0.00000000e+00, -5.55111512e-17, -2.77555756e-17])

In [87]:
demeaned.mean(1)

array([-0.08066304,  0.43171831,  0.00313436, -0.35418963])

In [88]:
row_means = arr.mean(1)

In [89]:
row_means.shape

(4,)

In [90]:
row_means.reshape((4, 1))

array([[-0.17418726],
       [ 0.33819409],
       [-0.09038985],
       [-0.44771384]])

In [91]:
demeaned = arr - row_means.reshape((4, 1))

In [92]:
demeaned.mean(1).astype('int64')

array([0, 0, 0, 0])

## Broadcasting over Other Axes

In [93]:
# arr - arr.mean(1)     #ValueError: operands could not be broadcast together with shapes (4,3) (4,)

In [94]:
arr = np.zeros((4, 4))

In [95]:
arr_3d = arr[:, np.newaxis, :]

In [96]:
arr_3d

array([[[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]],

       [[0., 0., 0., 0.]]])

In [97]:
arr_3d.shape

(4, 1, 4)

In [98]:
arr_1d = np.arange(4)

In [99]:
arr_1d[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3]])

In [100]:
arr_1d[np.newaxis, :]

array([[0, 1, 2, 3]])

In [101]:
arr = np.random.standard_normal((3, 4, 5))

In [102]:
arr

array([[[-0.07714675, -0.67536736, -0.14717299,  1.0964594 ,
         -0.01557338],
        [ 0.47519225,  0.22558707,  1.70123577,  0.31046832,
          0.75186432],
        [-0.81714202,  0.79448784, -0.59784615, -0.71124882,
          1.22780956],
        [-0.11701265, -1.22332728,  1.52713574,  0.59481494,
          1.97774966]],

       [[ 0.02736604,  0.30157278,  0.39982816, -0.31211139,
          0.20305185],
        [-0.03808487, -1.76377032, -0.57098907,  1.1812854 ,
         -0.32612168],
        [ 0.27803054,  1.36778009, -0.23995212,  0.3425733 ,
          0.91060264],
        [-0.10983569,  1.04435706, -0.54723131,  0.17358028,
         -0.39685734]],

       [[-0.31929474,  1.89106809,  0.26630139, -0.32697633,
         -0.37934131],
        [-0.39886021, -0.46277814, -1.06628479,  1.37752485,
          0.01768575],
        [-0.30201752, -0.61883355, -0.33059289, -0.65184855,
         -2.53702461],
        [-0.89688572, -0.44845917,  0.4898649 ,  0.78604178,
          1

In [103]:
depth_mean = arr.mean(2)

In [104]:
depth_mean

array([[ 0.03623979,  0.69286955, -0.02078792,  0.55187208],
       [ 0.12394149, -0.30353611,  0.53180689,  0.0328026 ],
       [ 0.22635142, -0.10654251, -0.88806342,  0.31863552]])

In [105]:
depth_mean.shape

(3, 4)

In [106]:
demeaned.shape

(4, 3)

In [107]:
demeaned = arr - depth_mean[:, :, np.newaxis]

In [108]:
demeaned.mean(2).astype('int64')

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

## Setting Array Values by Broadcasting

In [109]:
arr = np.zeros((5, 3))

In [110]:
arr[:] = 5

In [111]:
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [112]:
col = np.random.standard_normal(5)

In [113]:
col

array([-0.20827523,  0.40894697, -0.12929391, -1.27966879, -1.09047982])

In [114]:
arr[:] = col[:, np.newaxis]

In [115]:
arr

array([[-0.20827523, -0.20827523, -0.20827523],
       [ 0.40894697,  0.40894697,  0.40894697],
       [-0.12929391, -0.12929391, -0.12929391],
       [-1.27966879, -1.27966879, -1.27966879],
       [-1.09047982, -1.09047982, -1.09047982]])

In [116]:
arr[:2] = [[-1], [0]]

In [117]:
arr

array([[-1.        , -1.        , -1.        ],
       [ 0.        ,  0.        ,  0.        ],
       [-0.12929391, -0.12929391, -0.12929391],
       [-1.27966879, -1.27966879, -1.27966879],
       [-1.09047982, -1.09047982, -1.09047982]])

# Advanced ufunc Usage

In [118]:
arr = np.arange(10)

In [119]:
np.add.reduce(arr)

45

In [120]:
np.sum(arr)

45

In [121]:
rng = np.random.default_rng(12346)

In [122]:
arr = rng.standard_normal((5, 5))

In [123]:
arr

array([[-0.903889  ,  0.15713146,  0.89761199, -0.76219554, -0.17625556],
       [ 0.05303172, -1.62844028, -0.17753333,  1.96360352,  1.78125478],
       [-0.87971984, -1.69847913, -1.81891091,  0.11895453, -0.44409513],
       [ 0.76911421, -0.03433778,  0.39252776,  0.75891811, -0.07045967],
       [ 1.04984775,  1.02967072, -0.42005533,  0.78626627,  0.96124929]])

In [124]:
arr[::2].sort(1)    # sorting a few rows

In [125]:
arr

array([[-0.903889  , -0.76219554, -0.17625556,  0.15713146,  0.89761199],
       [ 0.05303172, -1.62844028, -0.17753333,  1.96360352,  1.78125478],
       [-1.81891091, -1.69847913, -0.87971984, -0.44409513,  0.11895453],
       [ 0.76911421, -0.03433778,  0.39252776,  0.75891811, -0.07045967],
       [-0.42005533,  0.78626627,  0.96124929,  1.02967072,  1.04984775]])

In [126]:
map = arr[:, :-1] < arr[:, 1:]

In [127]:
map

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [128]:
arr[:, :-1]

array([[-0.903889  , -0.76219554, -0.17625556,  0.15713146],
       [ 0.05303172, -1.62844028, -0.17753333,  1.96360352],
       [-1.81891091, -1.69847913, -0.87971984, -0.44409513],
       [ 0.76911421, -0.03433778,  0.39252776,  0.75891811],
       [-0.42005533,  0.78626627,  0.96124929,  1.02967072]])

In [129]:
arr[:, 1:]

array([[-0.76219554, -0.17625556,  0.15713146,  0.89761199],
       [-1.62844028, -0.17753333,  1.96360352,  1.78125478],
       [-1.69847913, -0.87971984, -0.44409513,  0.11895453],
       [-0.03433778,  0.39252776,  0.75891811, -0.07045967],
       [ 0.78626627,  0.96124929,  1.02967072,  1.04984775]])

In [130]:
np.logical_and.reduce(map, axis= 1)

array([ True, False,  True, False,  True])

In [131]:
arr = np.arange(15).reshape((3, 5))

In [132]:
np.add.accumulate(arr, axis= 1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [133]:
arr = np.arange(3).repeat([1, 2, 2])

In [134]:
arr

array([0, 1, 1, 2, 2])

In [135]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [136]:
x = rng.standard_normal((3, 4))

In [137]:
y = rng.standard_normal(5)

In [138]:
result = np.subtract.outer(x, y)

In [139]:
result.shape

(3, 4, 5)

In [140]:
arr = np.arange(10)

In [141]:
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [142]:
arr = np.multiply.outer(np.arange(4), np.arange(5))

In [143]:
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [144]:
np.add.reduceat(arr, [0, 2, 4], axis= 1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

## Writing New ufuncs in Python

In [145]:
def add_elements(x, y):
    return x + y

In [146]:
add_them = np.frompyfunc(add_elements, 2, 1)

In [147]:
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [148]:
add_vectorized = np.vectorize(add_elements, otypes= ['float64'])

In [149]:
add_vectorized(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [150]:
arr = rng.standard_normal(1_000_000)

In [151]:
%timeit add_vectorized(arr, arr)

1.09 s ± 599 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [152]:
%timeit np.add(arr, arr)

1.37 ms ± 398 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#Structured and Record Arrays

In [153]:
dtype = [('x', 'int32'), ('y', 'float64'), ('name', 'S10')]

In [154]:
sarr = np.array([(23, np.pi, 'Harshit'), (142, 2.345, 'Ruchi')], dtype= dtype)

In [155]:
sarr

array([( 23, 3.14159265, b'Harshit'), (142, 2.345     , b'Ruchi')],
      dtype=[('x', '<i4'), ('y', '<f8'), ('name', 'S10')])

In [156]:
sarr[0]

(23, 3.14159265, b'Harshit')

In [157]:
sarr[0]['y']

3.141592653589793

In [158]:
sarr[1]['name']

b'Ruchi'

In [159]:
sarr['x']

array([ 23, 142], dtype=int32)

## Nested Data Types and Multidimensional Fields

In [160]:
dtype = [('x', 'int64', 3), ('y', 'float64', 2)]

In [161]:
arr = np.zeros(4, dtype= dtype)

In [162]:
arr

array([([0, 0, 0], [0., 0.]), ([0, 0, 0], [0., 0.]),
       ([0, 0, 0], [0., 0.]), ([0, 0, 0], [0., 0.])],
      dtype=[('x', '<i8', (3,)), ('y', '<f8', (2,))])

In [163]:
arr[0]['x']

array([0, 0, 0])

In [164]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [165]:
dtype = [('x', [('a', 'int64'), ('b', 'int64')],), ('y', 'float64')]

In [166]:
nested_structured_array = np.zeros(4, dtype= dtype)

In [167]:
nested_structured_array

array([((0, 0), 0.), ((0, 0), 0.), ((0, 0), 0.), ((0, 0), 0.)],
      dtype=[('x', [('a', '<i8'), ('b', '<i8')]), ('y', '<f8')])

In [168]:
nested_structured_array['x']

array([(0, 0), (0, 0), (0, 0), (0, 0)], dtype=[('a', '<i8'), ('b', '<i8')])

In [169]:
nested_structured_array['x']['a']

array([0, 0, 0, 0])

In [170]:
nested_structured_array['y']

array([0., 0., 0., 0.])

#More About Sorting

In [171]:
arr = rng.standard_normal(7)

In [172]:
arr

array([ 1.39451773, -1.30866366, -0.25409492,  0.25057645, -0.21527984,
        0.5693466 , -0.16877397])

In [173]:
arr.sort()

In [174]:
arr

array([-1.30866366, -0.25409492, -0.21527984, -0.16877397,  0.25057645,
        0.5693466 ,  1.39451773])

In [175]:
arr2d = rng.standard_normal((3, 5))

In [176]:
arr2d

array([[ 1.46585853, -1.20297692,  0.43034205,  0.15217255, -1.4535079 ],
       [-0.73145337,  0.98473474, -0.67899187, -0.48447733,  0.58414676],
       [-0.13718003,  0.43959476,  0.96443963,  0.74513416,  0.17846528]])

In [177]:
arr2d.sort()

In [178]:
arr2d

array([[-1.4535079 , -1.20297692,  0.15217255,  0.43034205,  1.46585853],
       [-0.73145337, -0.67899187, -0.48447733,  0.58414676,  0.98473474],
       [-0.13718003,  0.17846528,  0.43959476,  0.74513416,  0.96443963]])

In [179]:
arr = rng.standard_normal(5)

In [180]:
arr

array([ 0.6975714 ,  0.32561761, -0.95333009,  0.78436601,  0.7416942 ])

In [181]:
np.sort(arr)

array([-0.95333009,  0.32561761,  0.6975714 ,  0.7416942 ,  0.78436601])

In [182]:
arr

array([ 0.6975714 ,  0.32561761, -0.95333009,  0.78436601,  0.7416942 ])

In [183]:
arr2d = rng.standard_normal((3, 5))

In [184]:
arr2d

array([[ 0.05967091,  1.94756264,  1.34064395,  0.41467602, -0.48233568],
       [ 1.17010935,  0.92839012, -1.14692057, -0.03008327, -0.10869556],
       [-0.13111287, -0.60783509,  1.82888382, -2.22118014,  0.43998846]])

In [185]:
arr2d.sort(axis= 1)

In [186]:
arr2d

array([[-0.48233568,  0.05967091,  0.41467602,  1.34064395,  1.94756264],
       [-1.14692057, -0.10869556, -0.03008327,  0.92839012,  1.17010935],
       [-2.22118014, -0.60783509, -0.13111287,  0.43998846,  1.82888382]])

In [187]:
arr2d[:, ::-1]

array([[ 1.94756264,  1.34064395,  0.41467602,  0.05967091, -0.48233568],
       [ 1.17010935,  0.92839012, -0.03008327, -0.10869556, -1.14692057],
       [ 1.82888382,  0.43998846, -0.13111287, -0.60783509, -2.22118014]])

##Indirect Sorts: argsort and lexsort

In [188]:
values = np.array([5, 0, 1, 3, 2])

In [189]:
indexer = values.argsort()

In [190]:
indexer

array([1, 2, 4, 3, 0])

In [191]:
values[indexer]

array([0, 1, 2, 3, 5])

In [192]:
arr = rng.standard_normal((3, 5))

In [193]:
arr[0] = values

In [194]:
arr

array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [-1.14224327,  0.79211819,  0.06288085,  0.67570686, -1.19488019],
       [ 1.88259363,  0.88836525, -0.71855252, -0.08737014,  0.25946087]])

In [195]:
arr.argsort()

array([[1, 2, 4, 3, 0],
       [4, 0, 2, 3, 1],
       [2, 3, 4, 1, 0]])

In [196]:
arr[:, arr[0].argsort()]     # sorting by first row

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [ 0.79211819,  0.06288085, -1.19488019,  0.67570686, -1.14224327],
       [ 0.88836525, -0.71855252,  0.25946087, -0.08737014,  1.88259363]])

In [197]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])

In [198]:
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])

In [199]:
sorter = np.lexsort((first_name, last_name))

In [200]:
sorter

array([1, 2, 3, 0, 4])

In [201]:
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

##Alternative Sort Algorithms

In [202]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])

In [203]:
key = np.array([2, 2, 1, 1, 1])

In [204]:
indexer = key.argsort(kind= 'mergesort')

In [205]:
indexer

array([2, 3, 4, 0, 1])

In [206]:
values.take(indexer)    # values[indexer]

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

## Partially Sorting Arrays

In [207]:
rng = np.random.default_rng(12345)

In [208]:
arr = rng.standard_normal(20)

In [209]:
arr

array([-1.42382504,  1.26372846, -0.87066174, -0.25917323, -0.07534331,
       -0.74088465, -1.3677927 ,  0.6488928 ,  0.36105811, -1.95286306,
        2.34740965,  0.96849691, -0.75938718,  0.90219827, -0.46695317,
       -0.06068952,  0.78884434, -1.25666813,  0.57585751,  1.39897899])

In [210]:
np.partition(arr, 3)

array([-1.95286306, -1.42382504, -1.3677927 , -1.25666813, -0.87066174,
       -0.75938718, -0.74088465, -0.06068952,  0.36105811, -0.07534331,
       -0.25917323, -0.46695317,  0.57585751,  0.90219827,  0.96849691,
        0.6488928 ,  0.78884434,  1.26372846,  1.39897899,  2.34740965])

In [211]:
indices = np.argpartition(arr, 3)

In [212]:
indices

array([ 9,  0,  6, 17,  2, 12,  5, 15,  8,  4,  3, 14, 18, 13, 11,  7, 16,
        1, 19, 10])

In [213]:
arr.take(indices)

array([-1.95286306, -1.42382504, -1.3677927 , -1.25666813, -0.87066174,
       -0.75938718, -0.74088465, -0.06068952,  0.36105811, -0.07534331,
       -0.25917323, -0.46695317,  0.57585751,  0.90219827,  0.96849691,
        0.6488928 ,  0.78884434,  1.26372846,  1.39897899,  2.34740965])

## numpy.searchsorted: Finding Elements in a sorted array

In [214]:
arr = np.arange(0, 500, 3)

In [215]:
arr.searchsorted(270)

90

In [216]:
arr.searchsorted([0, 8, 100, 273, 491])   # index to insert these elements to maintain sortedness

array([  0,   3,  34,  91, 164])

In [217]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])

In [218]:
arr.searchsorted([0, 1])

array([0, 3])

In [219]:
arr.searchsorted([0, 1], side= 'right')

array([3, 7])

In [220]:
data = np.floor(rng.uniform(0, 10000, size= 50))

In [221]:
data

array([ 815., 1598., 3401., 4651., 2664., 8157., 1932., 1294.,  916.,
       5985., 8547., 6016., 9319., 7247., 8605., 9293., 5461., 9376.,
       4949., 2737., 4517., 6650., 3308., 9034., 2570., 3398., 2588.,
       3554.,   50., 6286., 2823.,  680., 6168., 1763., 3043., 4408.,
       1502., 2179., 4743., 4763., 2552., 2975., 2790., 2605., 4827.,
       2119., 4956., 2462., 8384., 1801.])

In [222]:
bins = np.array([0, 100, 1000, 5000, 10000])

In [223]:
labels = bins.searchsorted(data)

In [224]:
labels

array([2, 3, 3, 3, 3, 4, 3, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4,
       3, 4, 3, 3, 3, 3, 1, 4, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3])

In [225]:
import pandas as pd

In [226]:
pd.Series(data).groupby(labels).mean()

1      50.000000
2     803.666667
3    3079.741935
4    7635.200000
dtype: float64

# Writing Fast NumPy Functions with Numba

In [227]:
def mean_distance(x: np.ndarray, y: np.ndarray) -> float:
    sum = 0
    n = 0

    for i in range(len(x)):
        sum += x[i] - y[i]
        n += 1

    return sum / n

In [228]:
x = rng.standard_normal(10_000_000)

In [229]:
y = rng.standard_normal(10_000_000)

In [230]:
%timeit mean_distance(x, y)

4.69 s ± 726 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [231]:
%timeit (x - y).mean()

41.9 ms ± 512 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [232]:
import numba as nb

In [233]:
numba_mean_distance = nb.jit(mean_distance, nopython= True)

In [234]:
@nb.jit(nopython= True)
def mean_numba(x: np.ndarray, y: np.ndarray) -> float:
    sum = 0
    n = 0

    for i in range(len(x)):
        sum += x[i] - y[i]
        n += 1

    return sum / n

In [235]:
%timeit numba_mean_distance(x, y)

18.1 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [236]:
%timeit mean_numba(x, y)

18.9 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [237]:
@nb.njit(nb.float64(nb.float64[:], nb.float64[:]))
def mean_numba_n(x: np.ndarray, y: np.ndarray) -> float:
    return (x - y).mean()

In [238]:
%timeit mean_numba_n(x, y)

86.3 ms ± 8.92 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Creating Custom numpy.ufunc Objects with Numba

In [239]:
from numba import vectorize

In [240]:
@vectorize
def nb_add(x, y):
    return x + y

In [241]:
x = np.arange(10)

In [242]:
nb_add(x, x)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [244]:
nb_add.accumulate(x)

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45])

# Advanced Array Input and Output
## Memory-Mapped Files

In [253]:
mmap = np.memmap('mymap', dtype= 'float64', mode= 'w+', shape= (10000, 10000))

In [254]:
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [255]:
section = mmap[:5]

In [256]:
section[:] = rng.standard_normal((5, 10000))

In [257]:
mmap

memmap([[ 0.87261505, -0.43904416,  0.36104378, ..., -0.58223029,
          1.77299113, -0.06148116],
        [ 0.19793309,  2.54419456,  0.9167002 , ...,  0.17414909,
          0.8363528 , -0.64138733],
        [-0.86714563,  1.35076286,  0.00715899, ...,  0.08768643,
          1.79289497, -2.03982074],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [258]:
mmap.flush()

In [259]:
mmap

memmap([[ 0.87261505, -0.43904416,  0.36104378, ..., -0.58223029,
          1.77299113, -0.06148116],
        [ 0.19793309,  2.54419456,  0.9167002 , ...,  0.17414909,
          0.8363528 , -0.64138733],
        [-0.86714563,  1.35076286,  0.00715899, ...,  0.08768643,
          1.79289497, -2.03982074],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [260]:
del mmap

In [261]:
mmap = np.memmap('mymap', dtype= 'float64', mode= 'r+', shape= (10000, 10000))

In [262]:
mmap

memmap([[ 0.87261505, -0.43904416,  0.36104378, ..., -0.58223029,
          1.77299113, -0.06148116],
        [ 0.19793309,  2.54419456,  0.9167002 , ...,  0.17414909,
          0.8363528 , -0.64138733],
        [-0.86714563,  1.35076286,  0.00715899, ...,  0.08768643,
          1.79289497, -2.03982074],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [263]:
!rm mymap

In [264]:
arr_c = np.ones((100, 10000), order= 'C')

In [265]:
arr_f = np.ones((100, 10000), order= 'F')

In [266]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [267]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [268]:
arr_c.flags.c_contiguous

True

In [271]:
%timeit arr_c.sum(axis= 1)

401 µs ± 16.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [272]:
%timeit arr_f.sum(axis= 1)

931 µs ± 151 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [273]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [275]:
arr_c[:50].flags.contiguous

True

In [276]:
arr_f[:, :50].flags.contiguous

False