In [None]:
import numpy as np

## 数组的创建

通过`np.array`可以把Python list转为numpy数组

In [None]:
arr1 = np.array([1, 3, 5, 7, 9])
arr1

array([1, 3, 5, 7, 9])

In [None]:
arr2 = np.array([
    [1, 2, 3],
    [10, 20, 30]
])
arr2

array([[ 1,  2,  3],
       [10, 20, 30]])

In [None]:
arr3 = np.array([
    [
        [1, 2, 3],
        [4, 5, 6],
    ],
    [
        [10, 20, 30],
        [40, 50, 60],
    ],
])
arr3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[10, 20, 30],
        [40, 50, 60]]])

## 数据类型

和C类似，NumPy中的数组是有类型的

https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes-constructing

In [None]:
np.array([1.5, 3.2, 5.1, 7.9, 9.0])

array([1.5, 3.2, 5.1, 7.9, 9. ])

In [None]:
np.array([1.5, 3.2, 5.1, 7.9, 9.0]).dtype

dtype('float64')

In [None]:
np.array([1.5, 3.2, 5.1, 7.9, 9.0, 0x1234], dtype='u2')

array([   1,    3,    5,    7,    9, 4660], dtype=uint16)

In [None]:
np.array([
  'test1', 'text 1 2 3', 'hello', 'world',
])

array(['test1', 'text 1 2 3', 'hello', 'world'], dtype='<U10')

操作numpy时需要密切注意数据类型，避免出现截断（字符串）或者溢出（无符号整形）

In [None]:
np.array([
  'test1', 'text 1 2 3', 'hello', 'world',
], dtype='U3')

array(['tes', 'tex', 'hel', 'wor'], dtype='<U3')

In [None]:
arr = np.array([1,2,3,4,5,6], dtype='uint8')
arr + 252

array([253, 254, 255,   0,   1,   2], dtype=uint8)

In [None]:
arr.astype('uint32') + 252

array([253, 254, 255, 256, 257, 258], dtype=uint32)

## 同构性

narray中的数据，必须是相同的数据类型，否则numpy会尝试转换为相同类型

In [None]:
np.array([1.5, 'test', 199])

array(['1.5', 'test', '199'], dtype='<U32')

In [None]:
np.array([
    (1.5, 'test'),
    (199, 'good'),
])

array([['1.5', 'test'],
       ['199', 'good']], dtype='<U32')

## 数组的索引和切片

In [None]:
arr = np.array([
    [-1.734464, 1.246888, 0.921260, -0.893499],
    [0.954527, -0.573931, -0.893817, 1.563767],
    [0.713028, -0.259905, -3.059838, -1.824971],
    [0.257634, 0.224982, 0.510233, 1.218742],
    [2.536384, 2.255618, -1.407619, 0.545119],
    [1.224132, 1.863338, 0.333345, -1.985052],
])
arr

array([[-1.734464,  1.246888,  0.92126 , -0.893499],
       [ 0.954527, -0.573931, -0.893817,  1.563767],
       [ 0.713028, -0.259905, -3.059838, -1.824971],
       [ 0.257634,  0.224982,  0.510233,  1.218742],
       [ 2.536384,  2.255618, -1.407619,  0.545119],
       [ 1.224132,  1.863338,  0.333345, -1.985052]])

索引（取下标）将数组降维，可以多次索引直至得到单个元素（标量）

In [None]:
arr[1]

array([ 0.954527, -0.573931, -0.893817,  1.563767])

In [None]:
arr[1][2]

-0.893817

多次索引可以简化为多维索引：

In [None]:
arr[1,2]

-0.893817

和Python list类似，可以通过切片，获取一段连续的片段。取出的切片没有降维。

In [None]:
arr[1:3]

array([[ 0.954527, -0.573931, -0.893817,  1.563767],
       [ 0.713028, -0.259905, -3.059838, -1.824971]])

In [None]:
arr[1:3, 2:4]

array([[-0.893817,  1.563767],
       [-3.059838, -1.824971]])

需要注意，因为切片不降维，多次切片和多维切片是不同的

In [None]:
arr[1:3, 2:4]

array([[-0.893817,  1.563767],
       [-3.059838, -1.824971]])

In [None]:
arr[1:3][2:4]

array([], shape=(0, 4), dtype=float64)

## 排序

直接排序：`np.sort`

In [None]:
arr[3]

array([0.257634, 0.224982, 0.510233, 1.218742])

In [None]:
np.sort(arr[3])

array([0.224982, 0.257634, 0.510233, 1.218742])

排序维度：

对于多维数组，`axis`参数指定排序维度，即希望排序后哪个维度是有序的。

In [None]:
arr

array([[-1.734464,  1.246888,  0.92126 , -0.893499],
       [ 0.954527, -0.573931, -0.893817,  1.563767],
       [ 0.713028, -0.259905, -3.059838, -1.824971],
       [ 0.257634,  0.224982,  0.510233,  1.218742],
       [ 2.536384,  2.255618, -1.407619,  0.545119],
       [ 1.224132,  1.863338,  0.333345, -1.985052]])

In [None]:
# 1维，即横向有序
np.sort(arr, axis=1)

array([[-1.734464, -0.893499,  0.92126 ,  1.246888],
       [-0.893817, -0.573931,  0.954527,  1.563767],
       [-3.059838, -1.824971, -0.259905,  0.713028],
       [ 0.224982,  0.257634,  0.510233,  1.218742],
       [-1.407619,  0.545119,  2.255618,  2.536384],
       [-1.985052,  0.333345,  1.224132,  1.863338]])

In [None]:
# 0维，即纵向有序
np.sort(arr, axis=0)

array([[-1.734464, -0.573931, -3.059838, -1.985052],
       [ 0.257634, -0.259905, -1.407619, -1.824971],
       [ 0.713028,  0.224982, -0.893817, -0.893499],
       [ 0.954527,  1.246888,  0.333345,  0.545119],
       [ 1.224132,  1.863338,  0.510233,  1.218742],
       [ 2.536384,  2.255618,  0.92126 ,  1.563767]])

间接排序：

多维数组直接排序不是太有用（为什么？），因此实际数据处理中对于多维数组，或者有关联的多个数组，一般使用间接排序`argsort`。

恒等式：`arr[argsort(arr)] == sort(arr)`

In [None]:
arr_row3 = arr[3]

In [None]:
np.sort(arr_row3)

array([0.224982, 0.257634, 0.510233, 1.218742])

In [None]:
arr_row3[np.argsort(arr_row3)]

array([0.224982, 0.257634, 0.510233, 1.218742])

`argsort`的返回值是能使数据有序的下标数组。

In [None]:
np.argsort(arr_row3)

array([1, 0, 2, 3])

如：按第0列排序整个数组

In [None]:
arr_col0 = arr[:, 0]
arr_col0

array([-1.734464,  0.954527,  0.713028,  0.257634,  2.536384,  1.224132])

In [None]:
arr[np.argsort(arr_col0)]

array([[-1.734464,  1.246888,  0.92126 , -0.893499],
       [ 0.257634,  0.224982,  0.510233,  1.218742],
       [ 0.713028, -0.259905, -3.059838, -1.824971],
       [ 0.954527, -0.573931, -0.893817,  1.563767],
       [ 1.224132,  1.863338,  0.333345, -1.985052],
       [ 2.536384,  2.255618, -1.407619,  0.545119]])

In [None]:
arr

array([[-1.734464,  1.246888,  0.92126 , -0.893499],
       [ 0.954527, -0.573931, -0.893817,  1.563767],
       [ 0.713028, -0.259905, -3.059838, -1.824971],
       [ 0.257634,  0.224982,  0.510233,  1.218742],
       [ 2.536384,  2.255618, -1.407619,  0.545119],
       [ 1.224132,  1.863338,  0.333345, -1.985052]])

## 排序技巧

### 逆序

`np.sort`后直接反向切片

In [None]:
print(np.sort(arr_row3))
print(np.sort(arr_row3)[::-1])

[0.224982 0.257634 0.510233 1.218742]
[1.218742 0.510233 0.257634 0.224982]


### 名次

最大、最小值可以用`np.max`、`np.min`和对应的`np.argmax`、`np.argmin`

In [None]:
print(np.sort(arr_row3))

print('-'*20)
print(arr_row3.max())
print(arr_row3.argmax())

print('-'*20)
print(arr_row3.min())
print(arr_row3.argmin())

[0.224982 0.257634 0.510233 1.218742]
--------------------
1.218742
3
--------------------
0.224982
1


第n名可以直接取下标

In [None]:
print(np.sort(arr_row3))

print(np.sort(arr_row3)[3])
print(np.argsort(arr_row3)[3])

[0.224982 0.257634 0.510233 1.218742]
1.218742
3


注意重复数据，如有必要可以使用`np.unique`去除重复后再排序

In [None]:
arr_with_dup = np.array([2, 3, 3, 5, 2, 1, 2, 9, 0, 9])
np.sort(arr_with_dup)

array([0, 1, 2, 2, 2, 3, 3, 5, 9, 9])

In [None]:
print(np.sort(arr_with_dup))
print(np.sort(arr_with_dup)[3])

[0 1 2 2 2 3 3 5 9 9]
2


In [None]:
print(np.sort(np.unique(arr_with_dup)))
print(np.sort(np.unique(arr_with_dup))[3])

[0 1 2 3 5 9]
3
