# ndarray 对象的内部机理

In [1]:
import numpy as np
np.ones((10, 5)).shape

(10, 5)

In [2]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

## NumPy 数据类型体系

In [5]:
ints = np.ones(10, dtype=np.uint16)

In [7]:
floats = np.ones(10, dtype=np.float32)

In [8]:
np.issubdtype(ints.dtype, np.integer)

True

In [9]:
np.issubdtype(floats.dtype, np.floating)

True

In [11]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

# 高级数组操作

## 数组重塑

In [12]:
arr = np.arange(8)

In [13]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [14]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [15]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [16]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [17]:
arr = np.arange(15)

In [18]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [19]:
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [20]:
other_arr = np.ones((3, 5))

In [21]:
other_arr.shape

(3, 5)

In [22]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [26]:
arr = np.arange(15).reshape((5, 3))

In [27]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [29]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [34]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [30]:
test = np.array([[], 1, [20, 300], [[4000, 50000, 600000]]])

In [33]:
test.ravel()

array([[], 1, [20, 300], [[4000, 50000, 600000]]], dtype=object)

## C 和 Fortran 顺序

In [36]:
arr = np.arange(12).reshape((3, 4))

In [37]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [39]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [40]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## 数组的合并和拆分

In [41]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

In [42]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [43]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [44]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [45]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [46]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [47]:
from numpy.random import randn

In [48]:
arr = randn(5, 2)

In [49]:
arr

array([[ 0.27743769,  0.12388054],
       [-0.36138058,  0.71135788],
       [ 0.00771352, -0.76818213],
       [-0.91053862, -0.92456691],
       [ 0.25504044,  1.69484135]])

In [50]:
first, second, third = np.split(arr, [1, 3])

In [51]:
first

array([[ 0.27743769,  0.12388054]])

In [52]:
second

array([[-0.36138058,  0.71135788],
       [ 0.00771352, -0.76818213]])

In [53]:
third

array([[-0.91053862, -0.92456691],
       [ 0.25504044,  1.69484135]])

### 堆叠辅助类：r_ 和 c_

In [54]:
arr = np.arange(6)

In [55]:
arr1 = arr.reshape((3, 2))

In [56]:
arr2 = randn(3, 2)

In [57]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 0.99047197,  0.14541813],
       [ 0.56807669,  0.75167902],
       [ 1.04691196, -0.19412328]])

In [59]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [ 0.99047197,  0.14541813,  3.        ],
       [ 0.56807669,  0.75167902,  4.        ],
       [ 1.04691196, -0.19412328,  5.        ]])

In [61]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

## 元素的重复操作：tile 和 repeat 

In [62]:
arr = np.arange(3)

In [63]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [64]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [65]:
arr = randn(2, 2)

In [66]:
arr

array([[ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131]])

In [67]:
arr.repeat(2, axis=0)

array([[ 1.68940283,  0.17191471],
       [ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131],
       [-0.65394844, -0.27945131]])

In [68]:
arr.repeat(2, axis=1)

array([[ 1.68940283,  1.68940283,  0.17191471,  0.17191471],
       [-0.65394844, -0.65394844, -0.27945131, -0.27945131]])

In [71]:
arr.repeat([2, 3], axis=0)

array([[ 1.68940283,  0.17191471],
       [ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131],
       [-0.65394844, -0.27945131],
       [-0.65394844, -0.27945131]])

In [73]:
arr.repeat([2, 3], axis=1)

array([[ 1.68940283,  1.68940283,  0.17191471,  0.17191471,  0.17191471],
       [-0.65394844, -0.65394844, -0.27945131, -0.27945131, -0.27945131]])

In [74]:
arr

array([[ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131]])

In [75]:
np.tile(arr, 2)

array([[ 1.68940283,  0.17191471,  1.68940283,  0.17191471],
       [-0.65394844, -0.27945131, -0.65394844, -0.27945131]])

In [76]:
arr

array([[ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131]])

In [78]:
np.tile(arr, (2, 1))

array([[ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131],
       [ 1.68940283,  0.17191471],
       [-0.65394844, -0.27945131]])

In [79]:
np.tile(arr, (3, 2))

array([[ 1.68940283,  0.17191471,  1.68940283,  0.17191471],
       [-0.65394844, -0.27945131, -0.65394844, -0.27945131],
       [ 1.68940283,  0.17191471,  1.68940283,  0.17191471],
       [-0.65394844, -0.27945131, -0.65394844, -0.27945131],
       [ 1.68940283,  0.17191471,  1.68940283,  0.17191471],
       [-0.65394844, -0.27945131, -0.65394844, -0.27945131]])

## 花式索引的等价函数：take 和 put

In [80]:
arr = np.arange(10) * 100

In [81]:
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [82]:
inds = [7, 1, 2, 6]

In [83]:
arr[inds]

array([700, 100, 200, 600])

In [84]:
arr.take(inds)

array([700, 100, 200, 600])

In [85]:
arr.put(inds, 42)

In [86]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [88]:
arr.put(inds, [40, 41, 42, 43])

In [89]:
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [90]:
inds = [2, 0, 2, 1]

In [91]:
arr = randn(2, 4)

In [92]:
arr

array([[ 0.22839752, -1.82750318, -0.3401264 ,  1.05481777],
       [ 0.93482005,  0.53067015, -0.22762429, -2.01358806]])

In [93]:
arr.take(inds, axis=1)

array([[-0.3401264 ,  0.22839752, -0.3401264 , -1.82750318],
       [-0.22762429,  0.93482005, -0.22762429,  0.53067015]])

In [94]:
arr = randn(1000, 50)

In [95]:
inds = np.random.permutation(1000)[:500]

In [96]:
%timeit arr[inds]

The slowest run took 63.63 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 23.1 µs per loop


In [98]:
%timeit arr.take(inds, axis=0)

The slowest run took 9.95 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 19.8 µs per loop


# 广播

In [99]:
arr = np.arange(5)

In [100]:
arr

array([0, 1, 2, 3, 4])

In [101]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [104]:
arr = randn(4, 3)

In [105]:
arr.mean()

0.24976484802786936

In [106]:
demeaned = arr - arr.mean(0)

In [107]:
demeaned

array([[-0.48997835,  0.72863407, -0.04663689],
       [ 1.08682661, -1.43803439,  1.5690455 ],
       [-0.94898696, -0.3114335 , -1.74069627],
       [ 0.35213869,  1.02083383,  0.21828766]])

In [108]:
demeaned.mean(0)

array([  0.00000000e+00,  -5.55111512e-17,  -5.55111512e-17])

In [109]:
arr

array([[-0.41138204,  0.87982988,  0.47286554],
       [ 1.16542292, -1.28683858,  2.08854793],
       [-0.87039066, -0.16023769, -1.22119384],
       [ 0.430735  ,  1.17202964,  0.73779009]])

In [110]:
row_means = arr.mean(1)

In [111]:
row_means

array([ 0.31377113,  0.65571076, -0.7506074 ,  0.78018491])

In [112]:
row_means.reshape((4, 1))

array([[ 0.31377113],
       [ 0.65571076],
       [-0.7506074 ],
       [ 0.78018491]])

In [113]:
demeaned = arr - row_means.reshape((4, 1))

In [117]:
demeaned

array([[-0.72515317,  0.56605875,  0.15909441],
       [ 0.50971216, -1.94254933,  1.43283717],
       [-0.11978326,  0.5903697 , -0.47058644],
       [-0.34944991,  0.39184473, -0.04239482]])

In [118]:
demeaned.mean(1)

array([ -5.55111512e-17,   0.00000000e+00,  -3.70074342e-17,
         9.25185854e-17])

## 沿其他轴向广播

In [119]:
arr

array([[-0.41138204,  0.87982988,  0.47286554],
       [ 1.16542292, -1.28683858,  2.08854793],
       [-0.87039066, -0.16023769, -1.22119384],
       [ 0.430735  ,  1.17202964,  0.73779009]])

In [121]:
arr.mean(1)

array([ 0.31377113,  0.65571076, -0.7506074 ,  0.78018491])

In [120]:
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [122]:
arr - arr.mean(1).reshape((4, 1))

array([[-0.72515317,  0.56605875,  0.15909441],
       [ 0.50971216, -1.94254933,  1.43283717],
       [-0.11978326,  0.5903697 , -0.47058644],
       [-0.34944991,  0.39184473, -0.04239482]])

In [123]:
arr = np.zeros((4, 4))

In [124]:
arr_3d = arr[:, np.newaxis, :]

In [125]:
arr_3d.shape

(4, 1, 4)

In [126]:
arr_3d

array([[[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]]])

In [127]:
arr_1d = np.random.normal(size=3)

In [128]:
arr_1d

array([-1.19908868, -0.05529584, -0.46484449])

In [129]:
arr_1d[:, np.newaxis]

array([[-1.19908868],
       [-0.05529584],
       [-0.46484449]])

In [130]:
arr_1d[np.newaxis, :]

array([[-1.19908868, -0.05529584, -0.46484449]])

In [131]:
arr = randn(3, 4, 5)

In [132]:
arr

array([[[ 0.5194294 , -0.7442728 ,  1.49701911,  0.22422336,  1.70335143],
        [-0.08783853, -0.12999978,  0.99877324,  0.03304517, -1.18395326],
        [ 0.34878713,  0.06039431,  0.46309393, -1.61840588,  1.19965277],
        [-0.97364808, -0.2022914 ,  0.24581044, -0.05907156, -0.14047192]],

       [[-0.06114596,  2.19909001, -0.53067962, -0.67236252, -0.73449416],
        [ 0.52194215,  0.47239867, -0.4738235 , -0.85810744,  0.07952663],
        [-1.13268847,  0.14355656, -0.60034699,  2.38196413,  0.2532706 ],
        [-0.10473964, -0.54885942,  0.26191975,  0.72513098, -1.47957981]],

       [[ 0.66890311,  0.56160003,  0.75560697,  0.58340857,  1.40148798],
        [ 1.36153086,  1.57886073, -0.53253091,  1.11498545, -0.71744747],
        [-0.72478858,  0.11183413, -0.15047934, -1.14648594, -0.20510516],
        [ 0.39463441,  1.47151478, -0.90075559,  0.24587123, -1.09743845]]])

In [133]:
depth_means = arr.mean(2)

In [134]:
depth_means

array([[ 0.6399501 , -0.07399463,  0.09070445, -0.2259345 ],
       [ 0.04008155, -0.0516127 ,  0.20915116, -0.22922563],
       [ 0.79420133,  0.56107973, -0.42300498,  0.02276527]])

In [135]:
demeaned = arr - depth_means[:, :, np.newaxis]

In [136]:
demeaned

array([[[-0.1205207 , -1.3842229 ,  0.85706901, -0.41572674,  1.06340133],
        [-0.0138439 , -0.05600515,  1.07276788,  0.1070398 , -1.10995863],
        [ 0.25808268, -0.03031014,  0.37238947, -1.70911034,  1.10894832],
        [-0.74771357,  0.0236431 ,  0.47174494,  0.16686294,  0.08546259]],

       [[-0.10122751,  2.15900846, -0.57076117, -0.71244407, -0.77457571],
        [ 0.57355484,  0.52401137, -0.4222108 , -0.80649474,  0.13113933],
        [-1.34183964, -0.0655946 , -0.80949816,  2.17281296,  0.04411944],
        [ 0.12448599, -0.31963379,  0.49114537,  0.95435661, -1.25035418]],

       [[-0.12529822, -0.2326013 , -0.03859436, -0.21079276,  0.60728664],
        [ 0.80045113,  1.017781  , -1.09361064,  0.55390571, -1.2785272 ],
        [-0.3017836 ,  0.53483911,  0.27252564, -0.72348096,  0.21789982],
        [ 0.37186913,  1.4487495 , -0.92352086,  0.22310595, -1.12020372]]])

In [137]:
demeaned.mean(2)

array([[  0.00000000e+00,  -4.44089210e-17,   0.00000000e+00,
          2.77555756e-17],
       [  2.22044605e-17,  -2.22044605e-17,   1.11022302e-16,
          0.00000000e+00],
       [  0.00000000e+00,  -1.33226763e-16,   8.32667268e-17,
          0.00000000e+00]])

In [138]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)
    
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

## 通过广播设置数组的值

In [139]:
arr = np.zeros((4, 3))

In [140]:
arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [141]:
arr[:] = 5

In [142]:
arr

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [143]:
col = np.array([1.28, -0.42, 0.44, 1.6])

In [144]:
arr[:] = col[:, np.newaxis]

In [145]:
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [146]:
arr[:2] = [[-1.37], [0.509]]

In [147]:
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])