## 1、一维数据的索引和切片

In [1]:
import numpy as np

In [7]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
arr[5]

5

In [9]:
arr[1:3]

array([1, 2])

In [10]:
arr[1:3] = 10

In [11]:
arr

array([ 0, 10, 10,  3,  4,  5,  6,  7,  8,  9])

如上，将一个标量值赋值给一个切片时，该值会自动传播到整个选区（这就是广播机制）

In [14]:
arr_slice = arr[1:3]

In [15]:
arr_slice[1] = 1234

In [16]:
arr_slice

array([  10, 1234])

In [17]:
arr

array([   0,   10, 1234,    3,    4,    5,    6,    7,    8,    9])

如上，可以直接在原数组上面改，说明切片得到的只是原始数据的视图，切片视图的地址和原来数组的地址是一样  
如果想要得到ndarray切片的一份副本而非视图，就需要显式地进行复制操作，例如arr[1:3].copy()。

## 2、 多维数据的索引和切片

In [18]:
# 二维及多维数组索引
arr2d = np.array([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 8]])

In [20]:
arr2d[1]

array([4, 5, 6])

In [21]:
arr2d[1][2]

6

In [25]:
arr2d[1, 2]

6

In [22]:
arr2d[0:2, 0:2]

array([[1, 2],
       [4, 5]])

In [23]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 8]])

## 3、花式索引    

它是指利用整数数组进行索引  
由于花式索引不同于切片，实现的是拷贝功能，生成的新数组改变不会影响原数据。

In [38]:
import numpy as np
x = np.array([1,2,3,4,5,6])
print(x[[0,1,2]]) # [1 2 3]
print(x[[-1,-2,-3]]) # [6,5,4]
x = np.array([[1,2],[3,4],[5,6]])
print(x[[0,1]]) # [[1,2],[3,4]]
print(x[[0,1],[0,1]]) # [1,4] 打印x[0][0]和x[1][1]
print(x[[0,1]][:,[0,1]]) # 打印01行的01列 [[1,2],[3,4]]

[1 2 3]
[6 5 4]
[[1 2]
 [3 4]]
[1 4]
[[1 2]
 [3 4]]


In [24]:
# 花式索引
arr = np.empty((8, 4))

In [25]:
arr

array([[9.29603063e+199, 1.21906099e-152, 1.94809318e+227,
        4.67366156e+257],
       [1.96086583e+243, 1.39487768e-258, 3.01468689e+161,
        3.17095864e+180],
       [2.19526213e-152, 9.04115219e+271, 3.81388253e+180,
        1.13556608e-153],
       [1.27734658e-152, 6.19490016e+223, 2.97327062e+222,
        6.01334628e-154],
       [9.05437256e+271, 1.37798837e-094, 1.46922885e+195,
        2.45769427e+198],
       [7.26613689e+223, 3.45249045e-085, 1.14304382e+228,
        2.41798950e+198],
       [8.76421364e+252, 5.36835912e+252, 8.92708268e-106,
        9.05437256e+271],
       [3.20837920e-104, 1.13378159e-153, 3.17095812e+180,
        1.46914166e+195]])

In [26]:
for i in range(8):
    arr[i] = i+1

In [27]:
arr

array([[1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.],
       [8., 8., 8., 8.]])

花式索引是用列表进行索引的

In [28]:
arr[[4, 3, 1, 6]] # 为了以特定顺序选取行子集，只需要传入一个用于
# 指定顺序的整数列表或ndarray

array([[5., 5., 5., 5.],
       [4., 4., 4., 4.],
       [2., 2., 2., 2.],
       [7., 7., 7., 7.]])

In [30]:
arr[[-5, -3]] # 从尾部索引

array([[4., 4., 4., 4.],
       [6., 6., 6., 6.]])

In [32]:
# reshape
arr = np.arange(20).reshape((4, 5))

In [33]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [35]:
arr[[1, 3],[2, 3]] # 传入两个列表时，
# 返回的是一维数组1行2列和3行3列的元素

array([ 7, 18])

In [36]:
arr[[1,3]]

array([[ 5,  6,  7,  8,  9],
       [15, 16, 17, 18, 19]])

In [102]:
arr[[1, 3]][:, 1:3]

array([[ 6,  7],
       [16, 17]])

In [39]:
# 通过花式索引生成的新数组的改变不影响有原数组
print(x)
a = x[[0,1],[0,1]]

print(a)
a[0]=123
print(a)
print(x)

[[1 2]
 [3 4]
 [5 6]]
[1 4]
[123   4]
[[1 2]
 [3 4]
 [5 6]]


In [40]:
# 通过切片生成的新数组的改变会影响原数组
print(x)
a = x[0:1,0:1]
print(a)
a[0]=100
print(a)
print(x)

[[1 2]
 [3 4]
 [5 6]]
[[1]]
[[100]]
[[100   2]
 [  3   4]
 [  5   6]]


## 4、布尔型索引

In [2]:
# 布尔型索引
names = np.array(['小明', '小红', '小强', '小桌子', '小凳子'])

data = np.random.randn(5, 4)

In [3]:
names

array(['小明', '小红', '小强', '小桌子', '小凳子'], dtype='<U3')

In [4]:
data

array([[-0.67595174, -0.58088847, -0.1663694 ,  0.22740215],
       [-0.167253  , -0.78160696, -0.84453411, -0.36873025],
       [ 1.13842487, -0.44894235, -1.80988227, -0.62226381],
       [-0.14940138, -0.39667472, -1.32883891,  1.09516725],
       [-1.11746884,  0.76112227, -0.66405181, -0.23107026]])

In [5]:
names == '小桌子' # 比较运算返回布尔值

array([False, False, False,  True, False])

In [46]:
data[names == '小桌子']  # 应用到行上面

array([[-0.95898121,  0.82144899, -2.01163347, -0.73865483]])

In [47]:
data[names != '小桌子'] 

array([[ 0.06228938, -0.4839554 ,  1.29115013, -1.1722873 ],
       [-0.43805903, -1.17372594, -1.90554743, -1.02893115],
       [ 0.3277612 , -0.45095656,  0.49333702, -0.26449954],
       [-0.58333065,  0.53868344, -0.48118255, -1.19602201]])

In [49]:
mask = (names == '小桌子')|(names == '小强')
# 定义遮罩变量，用&和|进行and和or操作

In [50]:
mask

array([False, False,  True,  True, False])

In [51]:
data[mask]

array([[ 0.3277612 , -0.45095656,  0.49333702, -0.26449954],
       [-0.95898121,  0.82144899, -2.01163347, -0.73865483]])

In [52]:
data < 0

array([[False,  True, False,  True],
       [ True,  True,  True,  True],
       [False,  True, False,  True],
       [ True, False,  True,  True],
       [ True, False,  True,  True]])

In [53]:
# 把data中所有的负值都设为零
data[data < 0 ] = 0

In [54]:
data

array([[0.06228938, 0.        , 1.29115013, 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.3277612 , 0.        , 0.49333702, 0.        ],
       [0.        , 0.82144899, 0.        , 0.        ],
       [0.        , 0.53868344, 0.        , 0.        ]])

## 5、数据转置和轴对换

In [55]:
arr = np.arange(15).reshape((3, 5))

In [56]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [57]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [58]:
# 求内积时会用到
np.dot(arr, arr.T)

array([[ 30,  80, 130],
       [ 80, 255, 430],
       [130, 430, 730]])

简单的转置就用.T,三维或多维可用transpose和swapaxes