### ndarray 对象的内部原理

In [1]:
import numpy as np

In [2]:
np.ones((10, 5)).shape

(10L, 5L)

In [3]:
np.ones((3, 4, 5), dtype=np.float64).strides     #跨度元组

(160L, 40L, 8L)

In [4]:
ints = np.ones(10, dtype=np.uint32)
ints

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint32)

In [5]:
floats = np.ones(10, dtype=np.float32)
floats

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()       #查看所有超类

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

### 数组重塑

In [9]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [10]:
arr.reshape((4,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [11]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [12]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [13]:
arr.reshape((3, -1))

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [14]:
arr = arr.reshape((3, -1))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [15]:
# 扁平化
arr = arr. ravel()
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [16]:
# flatten 永远返回数据的副本
arr = np.arange(15).reshape((5, 3))
arr = arr.flatten()
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C 和 Fortan 顺序

In [17]:
arr = np.arange(12).reshape((3, 4))
arr = arr.ravel('F')    # Fortan 序 ， 按列排
arr

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [18]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
arr = arr.ravel('C')
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

### 数组的合并与拆分

In [20]:
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9],[10,11,12]])
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [21]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [22]:
# 连接操作还有 vstack 和  hstack 
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
arr = np.random.rand(5,2)
arr

array([[0.05147884, 0.94672191],
       [0.49418984, 0.45749651],
       [0.8104054 , 0.58711855],
       [0.89966709, 0.00327122],
       [0.5448253 , 0.43258813]])

In [25]:
fir, sec, thi = np.split(arr, [1,3])   # 1 included,  3 excluded

In [26]:
fir

array([[0.05147884, 0.94672191]])

In [27]:
sec

array([[0.49418984, 0.45749651],
       [0.8104054 , 0.58711855]])

In [28]:
thi

array([[0.89966709, 0.00327122],
       [0.5448253 , 0.43258813]])

### 数组的重复操作

In [29]:
arr = np.arange(3)
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [30]:
arr2 = np.arange(4)
arr2.repeat([1,2,3,4])

array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3])

In [31]:
from numpy.random import randn
arr = randn(2, 2)
arr

array([[1.26594701, 1.01394027],
       [0.5930449 , 0.21739413]])

In [32]:
arr.repeat(2, axis=1)

array([[1.26594701, 1.26594701, 1.01394027, 1.01394027],
       [0.5930449 , 0.5930449 , 0.21739413, 0.21739413]])

In [33]:
arr.repeat(2, axis=0)

array([[1.26594701, 1.01394027],
       [1.26594701, 1.01394027],
       [0.5930449 , 0.21739413],
       [0.5930449 , 0.21739413]])

In [34]:
np.tile(arr, 2)

array([[1.26594701, 1.01394027, 1.26594701, 1.01394027],
       [0.5930449 , 0.21739413, 0.5930449 , 0.21739413]])

In [35]:
arr

array([[1.26594701, 1.01394027],
       [0.5930449 , 0.21739413]])

In [36]:
np.tile(arr, (2,1))

array([[1.26594701, 1.01394027],
       [0.5930449 , 0.21739413],
       [1.26594701, 1.01394027],
       [0.5930449 , 0.21739413]])

In [37]:
arr = np.arange(10) * 100
idxs = [7,1,2,6]
arr[idxs]    # sugar

array([700, 100, 200, 600])

In [38]:
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [39]:
arr.take(idxs)    # 获取

array([700, 100, 200, 600])

In [40]:
arr.put(idxs, 360)   # 设置

In [41]:
arr

array([  0, 360, 360, 300, 400, 500, 360, 360, 800, 900])

In [42]:
arr = randn(2, 4)
arr

array([[-0.26561297,  0.47386667,  0.64956465,  0.87491378],
       [ 2.20317611,  0.87804996,  0.24313928, -0.21054025]])

In [43]:
idxs = [2,0,2,1]

In [44]:
arr.take(idxs, axis=1)

array([[ 0.64956465, -0.26561297,  0.64956465,  0.47386667],
       [ 0.24313928,  2.20317611,  0.24313928,  0.87804996]])

### 广播

In [46]:
arr = np.arange(5)
arr
arr * 4

array([ 0,  4,  8, 12, 16])

In [49]:
arr = randn(4,3)
arr

array([[ 0.22644871, -1.29339766, -1.39954188],
       [ 0.42711014,  1.58910887,  0.78720495],
       [ 0.91661834, -0.41503798,  0.49866597],
       [ 0.87653368, -0.01792446, -0.27854418]])

In [50]:
arr.mean(0)      # 参数 axis = 0 指定 投影到 x 轴上那一列的平均数

array([ 0.61167772, -0.03431281, -0.09805378])

In [51]:
demeaned = arr - arr.mean(0)
demeaned

array([[-0.38522901, -1.25908486, -1.3014881 ],
       [-0.18456758,  1.62342168,  0.88525873],
       [ 0.30494063, -0.38072517,  0.59671976],
       [ 0.26485596,  0.01638835, -0.18049039]])

In [52]:
arr.mean(1)

array([-0.82216361,  0.93447465,  0.33341545,  0.19335501])