# Numpy 中的聚合操作

In [1]:
import numpy as np

In [2]:
L = np.random.random(100)
sum(L)

50.832660094432406

In [3]:
np.sum(L)

50.83266009443242

In [4]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

53.1 ms ± 88.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
252 µs ± 1.54 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Min, Max

In [5]:
np.min(big_array)

2.0750743298103558e-06

In [6]:
np.max(big_array)

0.9999971696442967

In [7]:
big_array.min()

2.0750743298103558e-06

In [8]:
big_array.max()

0.9999971696442967

In [9]:
big_array.sum()

499655.83983013383

### 多维度聚合

In [10]:
X = np.arange(16).reshape(4,-1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [11]:
np.sum(X)

120

In [12]:
np.sum(X, axis=0)

array([24, 28, 32, 36])

In [13]:
np.sum(X, axis=1)

array([ 6, 22, 38, 54])

### 其他聚合操作

In [14]:
np.prod(X)

0

In [15]:
np.prod(X + 1)

2004189184

In [16]:
np.mean(X)

7.5

In [17]:
np.median(X)

7.5

In [18]:
v = np.array([1, 1, 2, 2, 10])
np.mean(v)

3.2

In [19]:
np.median(v)

2.0

In [20]:
# percentile 百分值
np.percentile(big_array, q=50)

0.4996353714636223

In [21]:
np.median(big_array)

0.4996353714636223

In [22]:
np.percentile(big_array, q=100)

0.9999971696442967

In [23]:
np.max(big_array)

0.9999971696442967

In [24]:
for percent in [0, 25, 50, 75, 100]:
    print(np.percentile(big_array, q=percent))

2.0750743298103558e-06
0.2498819054281746
0.4996353714636223
0.7490236820825598
0.9999971696442967


In [25]:
# 计算沿指定轴的方差。
np.var(big_array)

0.08326700401679546

In [26]:
np.std(big_array)

0.2885602259785563

In [27]:
# 从正态（高斯）分布中抽取随机样本。
x = np.random.normal(0, 1, 1000000)

In [28]:
np.mean(x)

0.00025858747335631834

In [29]:
# 计算沿指定轴的标准偏差。
np.std(x)

1.000788639768486