# <span style="color:white">numpy是python中科学计算基础包</span>
## 1. ndarray

In [449]:
import numpy as np

arr = np.array(5)  # 创建0维ndarray
print(type(arr))
print(arr.ndim)  # ndarray维度

<class 'numpy.ndarray'>
0


In [450]:
arr = np.array([1, 2, 3, 4])  # 创建1维ndarray
print(arr.ndim)

1


In [451]:
arr = np.array([[1, 2, 3], [4, 5, 6]])  # 创建2维ndarray
print(arr.ndim)

2


### 1.1 同质性

In [452]:
arr = np.array([1, 'a'])  # 不同数据类型会强制转换为相同数据类型
print(arr)

['1' 'a']


In [453]:
arr = np.array([1, 2.5])
print(arr)

[1.  2.5]


### 1.2 ndarray属性

In [454]:
arr = np.array([[1, 2, 3], ['4', '5', '6']])
# arr = np.array([1,2,3])
print(arr)
print('number of dimensions: ', arr.ndim)  # 维度数量
print('shape: ', arr.shape)  # 数组形状(行列或高维度尺寸)
print('size: ', arr.size)  # 元素总个数
print('dtype: ', arr.dtype)  # 元素的类型
print('transposition: ', arr.T)  # 转置

[['1' '2' '3']
 ['4' '5' '6']]
number of dimensions:  2
shape:  (2, 3)
size:  6
dtype:  <U11
transposition:  [['1' '4']
 ['2' '5']
 ['3' '6']]


### 1.3 ndarray的创建

In [455]:
arr1 = np.array([1, 2, 3], dtype=np.float64)  # 指定元素类型
print(arr1)

[1. 2. 3.]


In [456]:
# copy
arr2 = np.copy(arr1)  # 深拷贝
print(arr1)
print(arr2)
arr2[0] = 4.
print(arr1)
print(arr2)

[1. 2. 3.]
[1. 2. 3.]
[1. 2. 3.]
[4. 2. 3.]


In [457]:
# 预定义形状
# 全0
arr_zero = np.zeros((2, 3))
print(arr_zero)
print(arr_zero.dtype)

[[0. 0. 0.]
 [0. 0. 0.]]
float64


In [458]:
# 全1
arr_one = np.ones((2,), dtype=int)
print(arr_one)
print(arr_one.dtype)

[1 1]
int32


In [459]:
# 未初始化(速度快)
arr_empty = np.empty((3, 3))
print(arr_empty)
print(arr_empty.dtype)

[[2.33419537e-313 3.39519327e-313 5.30498948e-313]
 [1.03977794e-312 1.14587773e-312 1.29441743e-312]
 [1.44295714e-312 1.86735630e-312 2.10077583e-312]]
float64


In [460]:
# 指定元素内容
arr_full = np.full((3, 3), 'A')
print(arr_full)
print(arr_full.dtype)

[['A' 'A' 'A']
 ['A' 'A' 'A']
 ['A' 'A' 'A']]
<U1


In [461]:
# 创建相同形状、类型的ndarray
arr_zero = np.zeros_like(arr1)
print(arr1)
print(arr_zero)
arr_one = np.ones_like(arr1)
print(arr_one)
arr_empty = np.empty_like(arr1)
print(arr_empty)
arr_full = np.full_like(arr1, 10.1)
print(arr_full)

[1. 2. 3.]
[0. 0. 0.]
[1. 1. 1.]
[1. 1. 1.]
[10.1 10.1 10.1]


In [462]:
# 等差数列
arr = np.arange(1, 10, 1)  # (start, end, step)
print(arr)

[1 2 3 4 5 6 7 8 9]


In [463]:
# 等间隔数列
arr = np.linspace(1, 10, 5, dtype=int)  # (start, end, num)
print(arr)

[ 1  3  5  7 10]


In [464]:
# 对数间隔数列
arr = np.logspace(0, 4, 3, base=2)
print(arr)

[ 1.  4. 16.]


In [465]:
# 特殊矩阵
# 单位矩阵 主对角线上元素为1，其他元素为0
arr = np.eye(3, 4, dtype=int)
print(arr)

[[1 0 0 0]
 [0 1 0 0]
 [0 0 1 0]]


In [466]:
# 对角矩阵 主对角线上元素不为0，其他元素为0
arr = np.diag([1, 2, 3, 4])
print(arr)

[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]


In [467]:
# 随机数组生成
# 生成0到1之间随机浮点数(均匀分布)
arr = np.random.rand(3, 3)
print(arr)

[[0.56229626 0.00581719 0.30742321]
 [0.95018431 0.12665424 0.07898787]
 [0.31135313 0.63238359 0.69935892]]


In [468]:
# 生成指定范围区间的随机浮点数
arr = np.random.uniform(3, 6, (3, 3))
print(arr)

[[4.92589484 5.76007134 3.89662906]
 [4.7062366  3.53587297 4.59772111]
 [4.94007441 3.42619613 4.74416689]]


In [469]:
# 生成指定范围区间的随机整数
arr = np.random.randint(1, 3, (4, 4))
print(arr)

[[2 1 1 2]
 [1 1 2 2]
 [2 1 1 1]
 [1 1 1 2]]


In [470]:
# 生成随机数列(正态分布)
arr = np.random.randn(3, 3)
print(arr)

[[ 0.56682064 -2.30337426  1.37700072]
 [-1.18738682  0.07939305  0.10796863]
 [ 0.5069511   1.12558936 -1.00805248]]


In [471]:
# 生成种子
np.random.seed(10)
print(np.random.randint(1, 10, (2, 2)))

[[5 1]
 [2 1]]


### 1.4 ndarray数据类型

|  类型   | 符号  |
|  ----  | ----  |
| 布尔型  | bool |
| 整型  | int8 int16 int32 int64 uint8 uint16 uint32 uint64 |
| 浮点型  | float16 float32 float64 |
| 复数  | complex64 complex128 |

In [472]:
print(np.array([1, 2, 3], dtype=np.bool))
print(np.array([1, 2, 3], dtype=np.uint64))
print(np.array([1, 2, 3], dtype=np.complex64))

[ True  True  True]
[1 2 3]
[1.+0.j 2.+0.j 3.+0.j]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.


### 1.5 索引与切片

In [473]:
arr = np.random.randint(1, 10, 10)
print(arr)

[2 9 1 9 7 5 4 1 5 7]


In [474]:
# 一维数组切片
print(arr[0])
print(arr[:])
print(arr[0:3])
print(arr[slice(0, 3)])
print(arr[0:5:2])
print(arr[arr > 2])
print(arr[(arr > 2) & (arr < 6)])

2
[2 9 1 9 7 5 4 1 5 7]
[2 9 1]
[2 9 1]
[2 1 7]
[9 9 7 5 4 5 7]
[5 4 5]


In [475]:
arr = np.random.randint(1, 10, (6, 6))
print(arr)

[[9 2 9 5 2 4]
 [7 6 4 7 2 5]
 [3 7 8 9 9 3]
 [1 7 8 9 2 8]
 [2 5 1 9 6 5]
 [8 9 9 3 7 3]]


In [476]:
# 二维数组切片
print(arr[0])
print(arr[1, 3])
print(arr[1, :])
print(arr[1])
print(arr[1, 0:3])
print(arr[slice(0, 2)])
print(arr[0][arr[0] > 5])
print(arr[:, 3])

[9 2 9 5 2 4]
7
[7 6 4 7 2 5]
[7 6 4 7 2 5]
[7 6 4]
[[9 2 9 5 2 4]
 [7 6 4 7 2 5]]
[9 9]
[5 7 9 9 9 3]


### 1.6 ndarray的运算

In [477]:
# 算术运算
# 一维数组
a = np.array([1, 2, 3])
b = np.array([3, 4, 5])
print(a + b)
print(a - b)
print(a * b)
print(a / b)
print(a ** 2)  # 幂运算

[4 6 8]
[-2 -2 -2]
[ 3  8 15]
[0.33333333 0.5        0.6       ]
[1 4 9]


In [478]:
# 矩阵
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[4, 5, 6], [7, 8, 9], [1, 2, 3]])
print(a + b)
print(a - b)
print(a * b)
print(a / b)
print(a ** 2)

[[ 5  7  9]
 [11 13 15]
 [ 8 10 12]]
[[-3 -3 -3]
 [-3 -3 -3]
 [ 6  6  6]]
[[ 4 10 18]
 [28 40 54]
 [ 7 16 27]]
[[0.25       0.4        0.5       ]
 [0.57142857 0.625      0.66666667]
 [7.         4.         3.        ]]
[[ 1  4  9]
 [16 25 36]
 [49 64 81]]


In [479]:
# 广播机制
# 条件 维度相同或其中一方为1
a = np.array([1, 2, 3])  # 3*1
b = np.array([[4], [5], [6]])  # 1*3
"""
  1 2 3        4 4 4
  1 2 3        5 5 5
  1 2 3        6 6 6
"""
print(a + b)
print(a - b)
print(a * b)
print(a / b)

[[5 6 7]
 [6 7 8]
 [7 8 9]]
[[-3 -2 -1]
 [-4 -3 -2]
 [-5 -4 -3]]
[[ 4  8 12]
 [ 5 10 15]
 [ 6 12 18]]
[[0.25       0.5        0.75      ]
 [0.2        0.4        0.6       ]
 [0.16666667 0.33333333 0.5       ]]


In [480]:
# 矩阵乘法运算
# 每个位置(x,y)结果为 a的x行与b的y列相乘的总和
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[4, 5, 6], [7, 8, 9], [1, 2, 3]])
print(a @ b)

[[ 21  27  33]
 [ 57  72  87]
 [ 93 117 141]]


### 1.7 ndarray常用函数

In [481]:
arr = np.array([1, 4, 9, 16])
# 计算平方根
print(np.sqrt(arr))
print(np.sqrt([1, 2, 3]))

[1. 2. 3. 4.]
[1.         1.41421356 1.73205081]


In [482]:
# 计算指数(e^x = y)
print(np.exp(1))
print(np.exp(2))

2.718281828459045
7.38905609893065


In [483]:
# 计算对数(lny = x)
print(np.log(np.e))
print(np.log(1))

1.0
0.0


In [484]:
# 正弦
print(np.sin(np.pi / 2))
# 余弦
print(np.cos(np.pi))

1.0
-1.0


In [485]:
# 幂运算
print(np.power(arr, 2))

[  1  16  81 256]


In [486]:
# 四舍五入(四舍六入五凑偶)
print(np.round([1.5, 2.4, 3.1, 4.5, 6.51, 9.6]))

[ 2.  2.  3.  4.  7. 10.]


In [487]:
# 绝对值
arr = np.array([-1, 0, 1, 2])
print(np.abs(arr))

[1 0 1 2]


In [488]:
# 向上取整
arr = np.array([1.5, 2.1, 3.5, 9.1])
print(np.ceil(arr))
# 向下取整
print(np.floor(arr))

[ 2.  3.  4. 10.]
[1. 2. 3. 9.]


In [489]:
# 检查NaN
print(np.isnan([1, 2, 3.4, 0]))
print(np.isnan([1, 2, np.nan, 0]))

[False False False False]
[False False  True False]


In [490]:
# 统计函数
# 平均数
arr = np.random.randint(1, 10, 10)
print(arr)
print(np.mean(arr))

[9 9 7 7 6 7 1 1 7 2]
5.6


In [491]:
# 中位数
print(np.median([1, 1, 1, 1, 1, 100]))

1.0


In [492]:
print(np.var(arr))  # 方差
print(np.sqrt(np.var(arr)))
print(np.std(arr))  # 标准差

8.64
2.939387691339814
2.939387691339814


In [493]:
print(np.sum(arr))  # 求和

56


In [494]:
# 最大值
arr = np.array([1, 1, 2, 4, 5, 7, 7])
print(np.max(arr))
# 最大值索引(第一次出现)
print(np.argmax(arr))
# 最小值、最小值索引
print(np.min(arr), np.argmin(arr))

arr1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# 每列最大值
print(np.max(arr1, axis=0))  # axis=0 列 axis=1 行
# 每行最大值
print(np.max(arr1, axis=1))

7
5
1 0
[7 8 9]
[3 6 9]


In [495]:
# 分位数
arr = np.array([44, 47, 64, 67])
print(np.median(arr))
print(np.percentile(arr, 25))

55.5
46.25


### <span style="color:red">分位数计算公式</span>
(n−1)∗p=i+j    （其中n为数组元素的个数，将计算结果的整数部分用i表示，小数部分用j来表示，p是百分位数，如90%的话就是0.9）
res=(1−j)∗arr[i]+j∗arr[i+1]     （res就是我们所需要的百分位数）

In [496]:
percent = 0.25  # 百分位
n = len(arr)  # 数组长度
index = (n - 1) * percent
i = int(index)  # 整数位
j = round(index, 2) - i  # 小数位
print(index, i, j)
res = (1 - j) * arr[i] + j * arr[i + 1]  # 分位数
print(res)

0.75 0 0.75
46.25


In [497]:
# 累积和
arr = np.array([1, 2, 3])
print(np.cumsum(arr))
# 累积积
print(np.cumprod(arr))

[1 3 6]
[1 2 6]


In [498]:
# 比较函数
arr = np.array([3, 4, 5, 6, 6, 7, 8])
# 是否大于
print(np.greater(arr, 4))
# 是否小于
print(np.less(arr, 6))
# 是否等于
print(np.equal(arr, 6))
print(np.equal([[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 7]]))

[False False  True  True  True  True  True]
[ True  True  True False False False False]
[False False False  True  True False False]
[[ True  True  True]
 [ True  True False]]


In [499]:
# 逻辑与
print(np.logical_and([1, 0], [0, 1]))
# 逻辑或
print(np.logical_or([1, 0], [0, 1]))
# 逻辑非
print(np.logical_not([1, 0]))
# 逻辑异或
print(np.logical_xor([1, 0], [0, 1]))

[False False]
[ True  True]
[False  True]
[ True  True]


In [500]:
# 检查元素是否至少有一个为True
print(np.any([1, 0, 0, 0, 0]))
# 检查元素是否全为True
print(np.all([1, 0, 0, 0, 0]))

True
False


In [501]:
# 自定义条件
arr = np.array([1, 2, 3, 4, 5])
print(np.where(arr >= 3, 1, 0))  # where(condition, true, false)
print(np.where(arr >= 3, 'A', 'B'))
print(np.where(arr >= 3, 'A', np.where(arr >= 2, "B", "C")))
# 自定义条件、结果 select(condition,result,default)
print(np.select([arr < 1, (2 <= arr) & (arr <= 3), arr > 3], ['C', 'B', 'A'], default='D'))

[0 0 1 1 1]
['B' 'B' 'A' 'A' 'A']
['C' 'B' 'A' 'A' 'A']
['D' 'B' 'B' 'A' 'A']


In [502]:
# 排序函数
np.random.seed(100)
arr = np.random.randint(1, 100, 20)
print(arr)
# arr.sort()
print(np.sort(arr))  # 返回排序后的数组
print(np.argsort(arr))  # 返回排序数组排序前的元素索引
print(arr)

[ 9 25 68 88 80 49 11 95 53 99 54 67 99 15 35 25 16 61 59 17]
[ 9 11 15 16 17 25 25 35 49 53 54 59 61 67 68 80 88 95 99 99]
[ 0  6 13 16 19 15  1 14  5  8 10 18 17 11  2  4  3  7 12  9]
[ 9 25 68 88 80 49 11 95 53 99 54 67 99 15 35 25 16 61 59 17]


In [507]:
# 去重函数
print(np.unique(arr))  # 去重并排序
print(np.unique(arr, return_counts=True))  # 同时返回每个唯一值出现的次数

[ 1  2  3  4  5  6  7  8  9 10 11 12]
(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64))


In [504]:
# 数组的拼接
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
print(np.concatenate((arr1, arr2)))
print(np.concatenate((arr2, arr1)))

[1 2 3 4 5 6]
[4 5 6 1 2 3]


In [505]:
# 数组的分割
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
print(np.split(arr, 2))  # 只能等分
print(np.split(arr, 4))
print(np.split(arr, [5, 9]))  # 指定分割索引 支持不等分

[array([1, 2, 3, 4, 5, 6]), array([ 7,  8,  9, 10, 11, 12])]
[array([1, 2, 3]), array([4, 5, 6]), array([7, 8, 9]), array([10, 11, 12])]
[array([1, 2, 3, 4, 5]), array([6, 7, 8, 9]), array([10, 11, 12])]


In [506]:
# 调整数组形状
print(np.reshape(arr, [2, 6]))  # 元素数量要相同

[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]
