## Numpy 聚合操作

In [1]:
import numpy as np

L = np.random.random(100)

In [2]:
L

array([0.44881941, 0.19441139, 0.66059954, 0.85608721, 0.50620364,
       0.76524587, 0.60218052, 0.10164346, 0.45645575, 0.27536548,
       0.96490206, 0.77113946, 0.71915715, 0.22815182, 0.09949439,
       0.80998222, 0.77087894, 0.83277105, 0.9723256 , 0.14958045,
       0.76309432, 0.32356079, 0.77857536, 0.39617647, 0.91605035,
       0.88815865, 0.16962625, 0.38336055, 0.83501089, 0.63553837,
       0.01963663, 0.59172167, 0.76518658, 0.09964763, 0.81320581,
       0.24331402, 0.54838058, 0.33861004, 0.35100481, 0.89131989,
       0.53033262, 0.70562867, 0.80192399, 0.01569478, 0.93880064,
       0.43391132, 0.44920887, 0.84554843, 0.62994582, 0.81983922,
       0.64591411, 0.18907585, 0.76377373, 0.63479534, 0.47057383,
       0.38195275, 0.71225033, 0.52974486, 0.85336775, 0.35942116,
       0.82026564, 0.00667711, 0.33103432, 0.11484439, 0.00127837,
       0.25550649, 0.91467901, 0.89401144, 0.39015486, 0.33000515,
       0.97050537, 0.41144639, 0.27925306, 0.36578635, 0.56057

In [3]:
sum(L)  # Python 自带求和函数

51.162671624312026

In [4]:
np.sum(L) # Numpy 中的求和函数

51.16267162431204

In [5]:
np.min(L)
# L.min()

0.0012783732338745857

In [7]:
np.max(L)
# L.max()

0.9733059379643044

In [8]:
x = np.arange(16).reshape(4, -1)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [9]:
np.sum(x)

120

In [10]:
np.sum(x, axis=0) # 求每一列的和

array([24, 28, 32, 36])

In [11]:
np.sum(x, axis=1) # 求每一行的和

array([ 6, 22, 38, 54])

In [12]:
np.prod(x) # 所有元素相乘

0

In [13]:
np.prod(x + 1)

2004189184

#### 均值

In [14]:
np.mean(x) # 求平均值

7.5

#### 中位数

In [15]:
np.median(x) # 求中位数

7.5

In [16]:
np.percentile(L, q = 50) # 在 L 数组中，百分之五十的元素都小于0.5179742491682362

0.5179742491682362

In [18]:
for percent in (0, 25, 50, 75, 100):
    print(np.percentile(L, q = percent))

0.0012783732338745857
0.27040072876514787
0.5179742491682362
0.7729984363920717
0.9733059379643044


#### 方差

In [20]:
np.var(L) 

0.08495044283685793

#### 标准差

In [21]:
np.std(L)

0.2914625925172181

In [23]:
x = np.random.normal(0, 1, size=1000000)

In [25]:
np.mean(x) # 均值

0.00023712235604809182

In [26]:
np.std(x) # 标准差

0.9998044079676688

## 索引

In [27]:
x = np.random.random(1000000)

In [28]:
np.min(x)

4.0350880425865654e-07

In [29]:
np.argmin(x) # 最小值的索引

136816

In [30]:
np.argmax(x)

292554

In [31]:
np.max(x)

0.9999984049063082

## 排序和使用索引

In [32]:
x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 乱序处理

In [33]:
np.random.shuffle(x)
x

array([ 2,  1, 14,  5, 15, 12,  3, 10, 11,  9,  6,  8,  7, 13,  0,  4])

In [34]:
np.sort(x)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [35]:
x 
# x 本身没有变化

array([ 2,  1, 14,  5, 15, 12,  3, 10, 11,  9,  6,  8,  7, 13,  0,  4])

In [36]:
x.sort()
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 二维数组排序

In [38]:
X = np.random.randint(10, size=(4, 4))
X

array([[7, 9, 6, 0],
       [8, 6, 3, 0],
       [3, 1, 0, 6],
       [1, 0, 2, 2]])

In [39]:
np.sort(X) # 默认按行排序

array([[0, 6, 7, 9],
       [0, 3, 6, 8],
       [0, 1, 3, 6],
       [0, 1, 2, 2]])

In [40]:
np.sort(X, axis=1)

array([[0, 6, 7, 9],
       [0, 3, 6, 8],
       [0, 1, 3, 6],
       [0, 1, 2, 2]])

In [41]:
np.sort(X, axis=0)

array([[1, 0, 0, 0],
       [3, 1, 2, 0],
       [7, 6, 3, 2],
       [8, 9, 6, 6]])

In [43]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### 按索引排序

In [45]:
np.random.shuffle(x)
x

array([ 6, 13,  1,  0,  5, 15, 14, 10,  9,  4,  8,  7,  3, 11,  2, 12])

In [46]:
np.argsort(x) # 按索引排序

array([ 3,  2, 14, 12,  9,  4,  0, 11, 10,  8,  7, 13, 15,  1,  6,  5],
      dtype=int64)

#### partition()函数

In [47]:
np.partition(x, 3)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  9,  8, 10, 13, 14, 11, 15, 12])

In [48]:
np.argpartition(x, 3)

array([ 3,  2, 14, 12,  9,  4,  0, 11,  8, 10,  7,  1,  6, 13,  5, 15],
      dtype=int64)