## 1. 引入 numpy 库：

In [1]:
import numpy as np

In [3]:
np.__version__

'1.21.5'

Numpy 限制了数组的类型，从而达到更高的效率

In [4]:
nparr = np.array([1, 2, 3.0])
nparr.dtype

dtype('float64')

In [5]:
nparr

array([1., 2., 3.])

### 其他初始化 np.array 的方法：zeros, full

In [12]:
arr = np.zeros(10, dtype = "int64")
print(arr)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

### 还可以初始化一个矩阵：

In [17]:
arr = np.full(shape = (3, 5), fill_value = "hello")
# 用关键字标记不同的参数，与顺序无关

print(arr)

[['hello' 'hello' 'hello' 'hello' 'hello']
 ['hello' 'hello' 'hello' 'hello' 'hello']
 ['hello' 'hello' 'hello' 'hello' 'hello']]


## 2. `arange` 方法与 `linspace` 方法

In [19]:
np.arange(0, 20, 2) # 与 range 语法一致

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [22]:
np.linspace(0,20,11, dtype=int) # 在范围内等距的取出十个数据

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

## 3. 创建随机数向量/矩阵

In [31]:
np.random.random(10)

array([0.62552398, 0.53788167, 0.25004949, 0.6616414 , 0.95263128,
       0.83929611, 0.20513773, 0.22583081, 0.35376718, 0.10724021])

In [29]:
np.random.randint(0, 100, size=(3,5))

array([[ 9, 81, 46, 33, 46],
       [15, 55, 17, 59, 26],
       [11, 57, 72, 54, 69]])

`normal`方法可以生成一个满足正太分布的随机数组：

In [37]:
np.random.normal(loc=10,scale=1,size=10)
# 均值为10，方差为1，大小为10的数组

array([11.07860617, 12.41383159,  9.0800886 ,  9.68659177, 10.86520318,
        9.67078179,  9.58967842, 10.64501099, 10.75771145,  9.77126654])

In [44]:
np.random.normal? #可以直接查询方法

## 4. `Numpy` 基本操作

In [51]:
X = np.random.randint(0,20, size=(3,5))
# 创建一个矩阵

### - 常见属性：

In [52]:
print("Dimension: ", X.ndim) # X的维度
print("Shape: ", X.shape) # X的元组
print("Size: ", X.size) # X元素的大小

Dimension:  2
Shape:  (3, 5)
Size:  15


### - 数据访问的方式：

In [64]:
print(X[0][0], "or", X[0,0])  
# 更推荐后者

0 or 0


In [69]:
X[:2, :4] # Py切片

array([[ 0,  5, 15, 16],
       [ 4,  9,  3,  2]])

In [68]:
X[0,::-1] #倒序枚举

array([ 4, 16, 15,  5,  0])

In [70]:
X[0, :] #取第一列

array([ 0,  5, 15, 16,  4])

### *Numpy的浅拷贝：

In [72]:
subX = X[:2, :3]
subX

array([[ 0,  5, 15],
       [ 4,  9,  3]])

In [73]:
subX[0, 0] = 100
subX

array([[100,   5,  15],
       [  4,   9,   3]])

In [75]:
X # 修改subX的同时改变了X，说明是浅拷贝

array([[100,   5,  15,  16,   4],
       [  4,   9,   3,   2,   0],
       [  4,   4,  14,   4,  13]])

In [80]:
subX = X[:2, :3].copy() # 这样就可以深拷贝了
X[0, 0] = 0
subX

array([[100,   5,  15],
       [  4,   9,   3]])

## 5. `Numpy` 常见方法

### reshape 方法：改写矩阵

In [83]:
X = np.random.randint(0, 20, 10)
X

array([12, 15,  3,  6,  8,  8,  2, 11,  0,  5])

In [88]:
X.reshape(2, -1)  # 将X转化为10行的矩阵

array([[12, 15,  3,  6,  8],
       [ 8,  2, 11,  0,  5]])

In [90]:
X.reshape(-1, 2) #将X转化为2列的矩阵

array([[12, 15],
       [ 3,  6],
       [ 8,  8],
       [ 2, 11],
       [ 0,  5]])

### 数据合并方法

In [2]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

In [18]:
np.concatenate((x.reshape(1,-1), y.reshape(1,-1)), axis=0)
# 先将二者转化为 1*3 的矩阵，在进行竖向合并

array([[1, 2, 3],
       [4, 5, 6]])

In [20]:
np.vstack([x,y])
# 可以直接竖向拼接，好用！

array([[1, 2, 3],
       [4, 5, 6]])

### 数据分割方法

In [23]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
x1, x2 = np.split(x, [4])
print(x1, x2) # 在位置 4 进行切割

[0 1 2 3] [4 5 6 7 8 9]


#### 对于高维矩阵，也可以进行分割：

In [27]:
A = np.arange(16).reshape(4,-1)
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [33]:
A1, A2 = np.split(A, [2]) # 基于行的维度进行分割
A1, A2 = np.split(A, [2], axis=1) # 基于列的维度进行分割
A1 

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [37]:
A1, A2 = np.vsplit(A,[2]) # 与合并操作类似
A1, A2 = np.hsplit(A,[2]) # 水平方向切割
A1

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

#### 分割方法可以抽出某个数据集的特征向量

In [38]:
x, y = np.hsplit(A, [-1]) # 抽出最后一维的特征
y # y 即为对应的特征

array([[ 3],
       [ 7],
       [11],
       [15]])

In [39]:
y[: , 0] # 抽出每一行的唯一元素，便构成了特征向量

array([ 3,  7, 11, 15])

### 数据排序方法

In [71]:
x = np.arange(16)
np.random.shuffle(x)
x

array([11,  6, 15,  7, 13,  2,  1, 10,  9,  5, 14,  3,  4,  0,  8, 12])

In [73]:
np.sort(x.reshape(4,-1)) # 但不改变本身
# 注意到这是在列方向上排序的

array([[ 6,  7, 11, 15],
       [ 1,  2, 10, 13],
       [ 3,  5,  9, 14],
       [ 0,  4,  8, 12]])

In [76]:
np.argsort(x) # 返回排序前的下标

array([13,  6,  5, 11, 12,  9,  1,  3, 14,  8,  7,  0, 15,  4, 10,  2],
      dtype=int64)

## 6. 矩阵运算

### 1. 线性运算

In [1]:
X = np.arange(1,16).reshape((3,5))
X

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [2]:
X * 2 # 可以直接进行线性运算

array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30]])

In [3]:
X ** 2 

array([[  1,   4,   9,  16,  25],
       [ 36,  49,  64,  81, 100],
       [121, 144, 169, 196, 225]], dtype=int32)

In [4]:
X // X

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]], dtype=int32)

In [8]:
3 ** X

array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)

In [9]:
np.log10(X)

array([[0.        , 0.30103   , 0.47712125, 0.60205999, 0.69897   ],
       [0.77815125, 0.84509804, 0.90308999, 0.95424251, 1.        ],
       [1.04139269, 1.07918125, 1.11394335, 1.14612804, 1.17609126]])

### 2. 矩阵运算

In [11]:
A = np.arange(4).reshape(2,-1)
A

array([[0, 1],
       [2, 3]])

In [12]:
B = np.full((2,2), 2)
B

array([[2, 2],
       [2, 2]])

In [13]:
A * B # 代数乘

array([[0, 2],
       [4, 6]])

In [14]:
A.dot(B) # 矩阵乘

array([[ 2,  2],
       [10, 10]])

In [15]:
A.T # 转置

array([[0, 2],
       [1, 3]])

In [17]:
v = np.array([1,2])
v + A # 每一行相应列的值相加

array([[1, 3],
       [3, 5]])

In [20]:
np.vstack([v] * A.shape[0]) + A # 等价于这个

array([[1, 3],
       [3, 5]])

In [42]:
from numpy.linalg import *
invA = inv(A)
invA

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [43]:
invA.dot(A) #单位矩阵

array([[1., 0.],
       [0., 1.]])

In [44]:
X = np.arange(6).reshape(2,-1)
X

array([[0, 1, 2],
       [3, 4, 5]])

In [47]:
pinvX = pinv(X) # 对于非方阵，也可以求其伪逆矩阵
pinvX

array([[-0.77777778,  0.27777778],
       [-0.11111111,  0.11111111],
       [ 0.55555556, -0.05555556]])

In [49]:
X.dot(pinvX) 

array([[ 1.00000000e+00, -4.16333634e-17],
       [ 5.55111512e-16,  1.00000000e+00]])

### 3. 矩阵复制与删除行&列

In [24]:
v = np.array([1,2])
np.tile(v, (2,3)) # 行方向重复2次，列方向重复3次

array([[1, 2, 1, 2, 1, 2],
       [1, 2, 1, 2, 1, 2]])

In [31]:
np.repeat(v, 3, axis=0) # 同元素复制三遍

array([1, 1, 1, 2, 2, 2])

In [81]:
K = np.tile(v,(2,3))
np.delete(K, np.s_[1:4], axis=1) # 删除第1,3行

array([[1, 1, 2],
       [1, 1, 2]])

## 7. 矩阵聚合操作

In [51]:
T = np.random.random(100).reshape(10,-1)

In [53]:
np.sum(T) # 或者 T.sum()

46.619168883495384

In [54]:
np.min(T) # 或者 T.min()

0.023777790834744406

In [55]:
np.sum(T, axis=0) # 在行方向进行加法运算

array([4.96335513, 4.2709837 , 5.22286257, 5.92276381, 3.14957514,
       5.84858132, 4.03162505, 4.11443372, 5.45294598, 3.64204246])

In [56]:
np.prod(T)

3.2628126245671375e-48

In [57]:
np.mean(T) # 平均数

0.4661916888349538

In [58]:
np.median(T) # 中位数

0.4320878141695259

In [59]:
np.var(T) # 方差

0.08934490152973876

In [60]:
np.std(T) # 标准差

0.29890617512814743

## 8. Numpy 数组比较

In [85]:
x = np.arange(16)
np.random.shuffle(x)
x

array([ 5,  4,  9, 10,  1,  7,  6, 14, 13,  8, 15,  3, 11,  2,  0, 12])

In [87]:
x < 7 # 布尔运算符索引

array([ True,  True, False, False,  True, False,  True, False, False,
       False, False,  True, False,  True,  True, False])

In [89]:
x[x > 6] = 0 # 将对应布尔下标的元素置零
x

array([5, 4, 0, 0, 1, 0, 6, 0, 0, 0, 0, 3, 0, 2, 0, 0])

In [107]:
np.any(x>6) & np.all(x>0)

False

In [106]:
np.sum((x<=6) & (x>0)) # 返回区间内个数

15

In [109]:
np.sum(x % 2 == 1) # 判断奇数个数

3

In [96]:
x[(x>1) & (x<3)] = 114
x

array([  5,   4,   0,   0,   1,   0,   6,   0,   0,   0,   0,   3,   0,
       114,   0,   0])

In [97]:
index = [0,1,4]
x[index] # fancy indexing

array([5, 4, 1])

In [118]:
X = np.random.randint(1,16,16).reshape(4,-1)
X

array([[ 2,  3,  5,  4],
       [15, 10, 12,  1],
       [10, 13, 11, 14],
       [ 5,  2,  9, 13]])

In [125]:
X[X[:, 2] % 3 == 0, :] # 选出那些第三个元素是3倍数的行向量

array([[15, 10, 12,  1],
       [ 5,  2,  9, 13]])