# Fancy Indexing

In [1]:
import numpy as np

x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [2]:
x[3]

3

In [3]:
x[3:9] # 索引一个区间

array([3, 4, 5, 6, 7, 8])

In [4]:
x[3:9:2] # 等步长索引

array([3, 5, 7])

In [5]:
[x[3], x[5], x[8]]

[3, 5, 8]

In [6]:
ind = [3, 5, 8]

In [7]:
x[ind] # Fancy Indexing

array([3, 5, 8])

In [9]:
ind = np.array([[0, 2],
                [1, 3]])
x[ind]

array([[0, 2],
       [1, 3]])

In [10]:
X = x.reshape(4, -1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [12]:
row = np.array([0, 1, 2]) # 感兴趣数据点的行对应的索引
col = np.array([1, 2, 3]) # 感兴趣数据点的列对应的索引
X[row, col]

array([ 1,  6, 11])

In [13]:
X[0, col]

array([1, 2, 3])

In [14]:
X[:2, col]

array([[1, 2, 3],
       [5, 6, 7]])

In [15]:
col = [True, False, True, True]

In [16]:
X[1:3, col]

array([[ 4,  6,  7],
       [ 8, 10, 11]])

### numpy.array的比较

In [17]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [18]:
x < 3

array([ True,  True,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False])

In [20]:
x > 3

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [21]:
x == 3

array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False])

In [22]:
x != 3

array([ True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [19]:
x[x < 3]

array([0, 1, 2])

In [24]:
x[2 * x == 24 - 4 * x]

array([4])

In [25]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [26]:
X < 6

array([[ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [27]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [28]:
np.sum(x <= 3)

4

np.any 和 np.all

In [29]:
np.count_nonzero(x <= 3) # 求非0元素个数

4

In [30]:
np.any(x == 0) # 向量中是否存在元素等于0

True

In [31]:
np.any(x < 0)

False

In [32]:
np.all(x > 0) # 是否向量中所有元素都大于0

False

In [33]:
np.all(x >= 0)

True

In [35]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [34]:
np.sum(X % 2 == 0)

8

In [36]:
np.sum(X % 2 == 0, axis=1)

array([2, 2, 2, 2])

In [37]:
np.sum(X % 2 == 0, axis=0)

array([4, 0, 4, 0])

In [38]:
np.all(X > 0, axis=1)

array([False,  True,  True,  True])

&&连接左右两个条件，即两个布尔值，而结果是一个布尔值。 <br>
&相当于一个位运算符，因为左右两边是两个数组，使两个数组按照相应的索引分别地进行与和或的运算。

In [39]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [40]:
np.sum((x > 3) & (x < 10))

6

In [41]:
np.sum((x > 3) && (x < 10))

SyntaxError: invalid syntax (<ipython-input-41-d834f65999a2>, line 1)

In [46]:
np.sum((x % 2 == 0) | (x > 10))

11

In [47]:
np.sum(~(x == 0))

15

In [49]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [50]:
X[X[:,3] % 3 == 0, :]

array([[ 0,  1,  2,  3],
       [12, 13, 14, 15]])

### Pandas
Pandas库提供dataframe数据结构，可以更加灵活地处理数据，不过由于在机器学习算法中，scikit-learn中的函数封装的都是接收一个numpy的矩阵，所以我们通常先使用Pandas对数据进行一系列的预处理，在预处理之后，将Pandas的数据转化为numpy的矩阵，再送给机器学习的算法。