## Numpy 聚合操作

- np.sum(x, axis=) 
- np.min(x, axis=)
- np.max(x, axis=)
- np.prod(x)
- np.mean(x)
- np.median(x)
- np.percentile(x, q=)
- np.var()
- np.std()
- np.argmin()
- np.argmax()
- np.random.shuffle()
- np.sort(x, axis=)
- np.argsort()
- np.partition(x, kth=) # 按 kth 分割
- np.argpartition()

In [1]:
import numpy as np

L = np.random.random(100)

In [2]:
L

array([0.06185388, 0.42809774, 0.79810793, 0.99739253, 0.7246074 ,
       0.02596476, 0.20511996, 0.53075721, 0.87828937, 0.20402702,
       0.79907092, 0.32990796, 0.72397725, 0.2323092 , 0.26977399,
       0.25782277, 0.96176898, 0.86615364, 0.68024625, 0.03662967,
       0.77854852, 0.15937785, 0.03239606, 0.27025811, 0.7591358 ,
       0.88891331, 0.60319532, 0.47798347, 0.07108007, 0.06057276,
       0.69799232, 0.87506401, 0.57090774, 0.36170016, 0.94585488,
       0.49502049, 0.14450824, 0.02806074, 0.79289687, 0.67475357,
       0.56639339, 0.20098045, 0.69597356, 0.73143357, 0.89504565,
       0.33836526, 0.86495312, 0.12717411, 0.53324154, 0.05411158,
       0.38907007, 0.52129127, 0.80612825, 0.2818077 , 0.13301471,
       0.53334633, 0.76703068, 0.92772338, 0.503884  , 0.78075206,
       0.92407445, 0.96247487, 0.99045568, 0.13753198, 0.94697151,
       0.7778074 , 0.08132458, 0.74631812, 0.89694233, 0.99938706,
       0.89157315, 0.58164433, 0.8929394 , 0.87749678, 0.55014

In [3]:
sum(L)  # Python 自带求和函数

53.46494594772232

In [4]:
np.sum(L) # Numpy 中的求和函数

53.4649459477223

In [5]:
np.min(L)
# L.min()

0.02596476304271944

In [6]:
np.max(L)
# L.max()

0.9993870646501801

In [7]:
x = np.arange(16).reshape(4, -1)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [8]:
np.sum(x)

120

In [9]:
np.sum(x, axis=0) # 求每一列的和

array([24, 28, 32, 36])

In [10]:
np.sum(x, axis=1) # 求每一行的和

array([ 6, 22, 38, 54])

In [11]:
np.prod(x) # 所有元素相乘

0

In [12]:
np.prod(x + 1)

2004189184

#### 均值

In [13]:
np.mean(x) # 求平均值

7.5

#### 中位数

In [14]:
np.median(x) # 求中位数

7.5

In [15]:
np.percentile(L, q = 50) # 在 L 数组中，百分之五十的元素都小于0.5179742491682362

0.5686505682533587

In [16]:
for percent in (0, 25, 50, 75, 100):
    print(np.percentile(L, q = percent))

0.02596476304271944
0.23192910059489927
0.5686505682533587
0.8105741573359128
0.9993870646501801


#### 方差

In [17]:
np.var(L) 

0.10060692478291414

#### 标准差

In [18]:
np.std(L)

0.31718594669832734

In [19]:
x = np.random.normal(0, 1, size=1000000)

In [20]:
np.mean(x) # 均值

-6.2175669789221e-05

In [21]:
np.std(x) # 标准差

0.9999444935068689

## 索引

In [22]:
x = np.random.random(1000000)

In [23]:
np.min(x)

3.461892073008954e-07

In [24]:
np.argmin(x) # 最小值的索引

764926

In [25]:
np.argmax(x)

473168

In [26]:
np.max(x)

0.9999977203069457

## 排序和使用索引

In [27]:
x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 乱序处理

In [28]:
# 乱序处理后，数组本身会改变
np.random.shuffle(x)
x

array([14,  3, 10,  5,  1,  4,  7, 15,  9, 12,  6, 11,  8, 13,  2,  0])

In [29]:
np.sort(x)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [30]:
# 排序后，x 本身没有变化
x 

array([14,  3, 10,  5,  1,  4,  7, 15,  9, 12,  6, 11,  8, 13,  2,  0])

In [31]:
x.sort()
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 二维数组排序

In [32]:
X = np.random.randint(10, size=(4, 4))
X

array([[2, 6, 6, 3],
       [8, 6, 3, 4],
       [2, 3, 4, 4],
       [0, 6, 1, 0]])

In [33]:
np.sort(X) # 默认按行排序

array([[2, 3, 6, 6],
       [3, 4, 6, 8],
       [2, 3, 4, 4],
       [0, 0, 1, 6]])

In [34]:
np.sort(X, axis=1)

array([[2, 3, 6, 6],
       [3, 4, 6, 8],
       [2, 3, 4, 4],
       [0, 0, 1, 6]])

In [35]:
np.sort(X, axis=0)

array([[0, 3, 1, 0],
       [2, 6, 3, 3],
       [2, 6, 4, 4],
       [8, 6, 6, 4]])

In [36]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 按索引排序

In [37]:
np.random.shuffle(x)
x

array([ 9,  0,  3,  6,  2,  5,  1,  8, 12, 11, 10, 13, 15, 14,  4,  7])

In [38]:
np.argsort(x) # 按索引排序

array([ 1,  6,  4,  2, 14,  5,  3, 15,  7,  0, 10,  9,  8, 11, 13, 12],
      dtype=int64)

#### partition()函数

In [39]:
np.partition(x, kth=3) 
# 以 3 为基准把数组分为两部分，所有比 3 小的元素在 3 的左边，比 3 大的元素在 3 右面

array([ 0,  1,  2,  3,  4,  5,  7,  6,  8, 11, 10, 13, 15, 14, 12,  9])

In [40]:
np.argpartition(x, 3)

array([ 1,  6,  4,  2, 14,  5, 15,  3,  7,  9, 10, 11, 12, 13,  8,  0],
      dtype=int64)