In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm
import jieba 

In [2]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[:2, 1:] = 0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

## 布尔型索引

In [3]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [4]:
data

array([[ 0.30651414, -0.15367842, -0.51024605,  0.10523056],
       [ 0.22865832, -2.24107297, -2.3812352 ,  1.21220648],
       [-0.24930395,  0.54611995, -0.4650835 ,  0.69061654],
       [-1.06902493, -0.47311428, -0.19760437, -0.20204087],
       [ 1.68888973, -2.41527936, -0.51082585, -1.21124954],
       [-1.36337818,  1.28246317, -0.96402984,  0.21886945],
       [-0.09607589, -0.00675764, -0.06504746, -1.77080361]])

In [5]:
#跟算术运算⼀样，数组的⽐较运算（如==）也是⽮量化的
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [6]:
#布尔型数组可⽤于数组索引
data[names == 'Bob']

array([[ 0.30651414, -0.15367842, -0.51024605,  0.10523056],
       [-1.06902493, -0.47311428, -0.19760437, -0.20204087]])

In [7]:
#布尔型数组的⻓度必须跟被索引的轴⻓度⼀致。此外，还可以将布尔型数组跟切⽚、整数（或整数序列）混合使⽤
data[names == 'Bob', 2:]

array([[-0.51024605,  0.10523056],
       [-0.19760437, -0.20204087]])

In [8]:
data[names == 'Bob', 3]

array([ 0.10523056, -0.20204087])

In [9]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [10]:
data[~(names == 'Bob')]

array([[ 0.22865832, -2.24107297, -2.3812352 ,  1.21220648],
       [-0.24930395,  0.54611995, -0.4650835 ,  0.69061654],
       [ 1.68888973, -2.41527936, -0.51082585, -1.21124954],
       [-1.36337818,  1.28246317, -0.96402984,  0.21886945],
       [-0.09607589, -0.00675764, -0.06504746, -1.77080361]])

In [12]:
cond = names =='Bob'
data[~cond]

array([[ 0.22865832, -2.24107297, -2.3812352 ,  1.21220648],
       [-0.24930395,  0.54611995, -0.4650835 ,  0.69061654],
       [ 1.68888973, -2.41527936, -0.51082585, -1.21124954],
       [-1.36337818,  1.28246317, -0.96402984,  0.21886945],
       [-0.09607589, -0.00675764, -0.06504746, -1.77080361]])

In [13]:
# 选取这三个名字中的两个需要组合应⽤多个布尔条件，使⽤&（和）、|（或）之类的布尔算术运算符即可：
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [14]:
data[mask]

array([[ 0.30651414, -0.15367842, -0.51024605,  0.10523056],
       [-0.24930395,  0.54611995, -0.4650835 ,  0.69061654],
       [-1.06902493, -0.47311428, -0.19760437, -0.20204087],
       [ 1.68888973, -2.41527936, -0.51082585, -1.21124954]])

In [15]:
# 通过布尔型索引选取数组中的数据，将总是创建数据的副本，即使返回⼀模⼀样的数组也是如此。注意：Python关键字and和or在布尔型数组中⽆效。
# 要是⽤&与|。通过布尔型数组设置值是⼀种经常⽤到的⼿段。为了将data中的所有负值都设置为0，我们只需：
data[data < 0] = 0
data

array([[0.30651414, 0.        , 0.        , 0.10523056],
       [0.22865832, 0.        , 0.        , 1.21220648],
       [0.        , 0.54611995, 0.        , 0.69061654],
       [0.        , 0.        , 0.        , 0.        ],
       [1.68888973, 0.        , 0.        , 0.        ],
       [0.        , 1.28246317, 0.        , 0.21886945],
       [0.        , 0.        , 0.        , 0.        ]])

In [16]:
#通过⼀维布尔数组设置整⾏或列的值也很简单
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.22865832, 0.        , 0.        , 1.21220648],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 1.28246317, 0.        , 0.21886945],
       [0.        , 0.        , 0.        , 0.        ]])

花式索引
#花式索引（Fancy indexing）是⼀个NumPy术语，它指的是利⽤
整数数组进⾏索引。假设我们有⼀个8×4数组：

In [17]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [18]:
#为了以特定顺序选取⾏⼦集，只需传⼊⼀个⽤于指定顺序的整数列表或ndarray即可：
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [21]:
#使⽤负数索引将会从末尾开始选取⾏：
arr[[-3, -5, -7]]

array([0., 0., 0., 0.])

In [22]:
#⼀次传⼊多个索引数组会有⼀点特别,返回的是⼀个⼀维数组，其中的元素对应各个索引元组：
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [23]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [26]:
arr[[1,	5,	7,	2]][:,	[0,	3,	1,	2]]


array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])