**# 常用方法**

**数组拼接**

In [None]:
import numpy as np

a = np.arange(12).reshape(2,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [None]:
b = np.arange(12,24).reshape(2,6)
b

array([[12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [None]:
np.vstack((a,b))   # 垂直拼接

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [None]:
np.hstack((a,b))   # 水平拼接

array([[ 0,  1,  2,  3,  4,  5, 12, 13, 14, 15, 16, 17],
       [ 6,  7,  8,  9, 10, 11, 18, 19, 20, 21, 22, 23]])

**行列交换**

In [None]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
a[[1,2],:] = a[[2,1],:]   # 行交换
a

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [ 4,  5,  6,  7]])

In [None]:
a[:,[0,1]] = a[:,[1,0]]   # 列交换
a

array([[ 1,  0,  2,  3],
       [ 9,  8, 10, 11],
       [ 5,  4,  6,  7]])

**快速创建**

In [None]:
# 创建全为0的数组   
a = np.zeros((3,4))
a

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
# 创建全为1的数组
b = np.ones((3,4))
b

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [None]:
# 创建对角线为1的正方形数组
c = np.eye(3)
c

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

**获取最大最小值**

In [None]:
a = np.eye(4)
a

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [None]:
np.argmax(a, axis=0) # 返回每行的最大值位置

array([0, 1, 2, 3])

In [None]:
np.argmin(a, axis=1) # 返回每列的最小值位置(位置靠前优先)

array([1, 0, 0, 0])

**生成随机数**

In [None]:
# 创建n维的均匀分布的随机数数组，浮点数，范围0-1

np.random.rand(1,2,3)   

array([[[0.47014997, 0.75889007, 0.75323021],
        [0.24964146, 0.61134234, 0.96709655]]])

In [None]:
# 创建n维的标准正态分布随机数，浮点数，平均数0，标准差1

np.random.randn(1,2,3) 

array([[[-1.30080955, -1.47834642, -0.24659441],
        [ 1.82981256, -1.74659967,  1.71427025]]])

In [None]:
# 从给定上下限范围选取随机数整数

np.random.randint(-2, 1, (3,4))   # 范围是-2到1(左闭右开)，形状为(3，4) 

array([[-2, -2, -2,  0],
       [ 0,  0, -1,  0],
       [-1, -1,  0, -1]])

In [None]:
# 产生具有均匀分布的数组

np.random.uniform(-2, 2, (3,4))  # 范围是-2到2，形状为(3，4)

array([[-0.78303103,  0.09902573, -0.27221993, -0.83508344],
       [ 0.44741158, -1.44202456, -0.83142141, -0.53455263],
       [-0.17572006,  1.14070385, -1.20130487,  0.05693775]])

In [None]:
# 从指定正态分布中随机抽取样本

np.random.normal(2, 1, (3,4))   # 分布中心为2，标准差为1，形状为(3,4)

array([[1.45561728, 2.11092259, 0.84900642, 2.37569802],
       [1.39936131, 1.70830625, 1.39829339, 3.85227818],
       [1.98650278, 0.94228907, 2.82254491, 0.77915635]])

In [None]:
# 随机数种子, 保证每次随机结果一致

np.random.seed(42)
np.random.randint(-2, 1, (3,4)) 

array([[ 0, -2,  0,  0],
       [-2, -2,  0, -1],
       [ 0,  0,  0,  0]])

**关于拷贝**

* 浅拷贝

In [None]:
# a = b，完全不复制，a b会互相影响
# a = b[:], 视图的操作，会创建新的对象a，但是a的数据完全由b保管，a和b的数据变化是一致的
# 上述两种方式，a和b都会互相影响，区别在于后者会产生新的对象

a = []
b = np.zeros((3,4))
a = b
b[1,:] = 1
a

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.]])

In [None]:
a[0,:] = 2
b

array([[2., 2., 2., 2.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.]])

* 深拷贝

In [None]:
# .copy()操作，a和b互不影响

b = np.zeros((3,4))
a = b.copy()
b[1,:] = 1
a

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

**常用统计函数**

In [None]:
a = np.arange(24).reshape(4,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [None]:
# (按行/列)求和 - 方式1

np.sum(a), np.sum(a, axis=0), np.sum(a, axis=1)

(276, array([36, 40, 44, 48, 52, 56]), array([ 15,  51,  87, 123]))

In [None]:
# (按行/列)求和 - 方式2

a.sum(), a.sum(axis=0), a.sum(axis=1)

(276, array([36, 40, 44, 48, 52, 56]), array([ 15,  51,  87, 123]))

In [None]:
# (按行列)求均值 - 方式1/2

np.mean(a), a.mean(), np.mean(a, axis=0), a.mean(axis=1)

(11.5,
 11.5,
 array([ 9., 10., 11., 12., 13., 14.]),
 array([ 2.5,  8.5, 14.5, 20.5]))

In [None]:
# (按行列)求中值 - 只有如下方式

np.median(a), np.median(a, axis=0), np.median(a, axis=1)

(11.5, array([ 9., 10., 11., 12., 13., 14.]), array([ 2.5,  8.5, 14.5, 20.5]))

In [None]:
# (按行列)求最大(小)值 - 方式1/2

np.max(a), a.min(), np.max(a, axis=0), a.min(axis=1)

(23, 0, array([18, 19, 20, 21, 22, 23]), array([ 0,  6, 12, 18]))

In [None]:
# (按行列)极值，即最大最小值之差 - 方式1/2

np.ptp(a), a.ptp(), np.ptp(a, axis=0), a.ptp(axis=1)

(23, 23, array([18, 18, 18, 18, 18, 18]), array([5, 5, 5, 5]))

In [None]:
# (按行列)标准差 - 方式1/2

np.std(a), a.std(), np.std(a, axis=0), a.std(axis=1)

(6.922186552431729,
 6.922186552431729,
 array([6.70820393, 6.70820393, 6.70820393, 6.70820393, 6.70820393,
        6.70820393]),
 array([1.70782513, 1.70782513, 1.70782513, 1.70782513]))

**NAN和INF**

* NAN: not a number。一般，当读取本地文件为float时，如果有缺失，就会出现nan；或者做了一个不合适的计算时，例如$无穷大-无穷大$。

* INF: infinity， inf表示正无穷，-inf表示负无穷。比如一个数字除以0，python中直接会报错，numpy中是一个inf或者-inf。

In [None]:
# 指定一个nan，并查看类型

a = np.nan
a, type(a)

(nan, float)

In [None]:
# 指定一个inf，并查看类型

a = np.inf
a, type(a)

(inf, float)

* 两个nan是不相等的
* 利用以上特性可以判断数组中nan的个数

In [None]:
np.nan != np.nan

True

In [None]:
a = np.array([1., 2., 3., np.nan])
np.count_nonzero(a != a)

1

* 通过np.isnan()来判断是否为nan，会返回bool类型。也可以通过该方式替换nan。

In [None]:
a[np.isnan(a)] = 0
a

array([1., 2., 3., 0.])

* nan和任何数计算都会返回nan

In [None]:
b = np.nan
a * b

array([nan, nan, nan, nan])

* 替换数据中nan值，一般会采用均值替代，具体例子如下所示:

In [None]:
a = np.arange(12).reshape((3,4)).astype("float")
a[1,2:] = np.nan
a

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5., nan, nan],
       [ 8.,  9., 10., 11.]])

In [None]:
def fill_nan(a):
  for i in range(a.shape[1]):   # 依次对每列进行操作
    a_col = a[:,i]   # 当前列
    nan_num = np.count_nonzero(a_col != a_col)
    if nan_num != 0:
      not_nan_col = a_col[a_col == a_col] # 提取当前列不为nan的数
      mean = not_nan_col.mean() 
      a_col[a_col != a_col] = mean
  return a

fill_nan(a)  

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])