In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame([
    [1.4, np.nan],
    [np.nan, np.nan],
    [0.75, -1.3],
    [7.1, -4.5],
], index = list("abcd"), columns = ["A", "B"])
df

Unnamed: 0,A,B
a,1.4,
b,,
c,0.75,-1.3
d,7.1,-4.5


In [6]:
# 默认按列求和
df.sum()

A    9.25
B   -5.80
dtype: float64

In [5]:
# 按行求和
df.sum(axis = 1)

a    1.40
b    0.00
c   -0.55
d    2.60
dtype: float64

In [7]:
# 默认skipna=True，即在运算过程中缺失值通过0代替后进行计算
# ⚠️ skipna = False会保留缺失值 则计算结果会收到影响
df.sum(axis = 1, skipna = False)

a     NaN
b     NaN
c   -0.55
d    2.60
dtype: float64

In [9]:
# idxmax函数返回最大值对应的索引
# A列中的最大值为0.75，对应的索引为d
# B列中的最大值为-1.3，对应的索引为c
df.idxmax()

A    d
B    c
dtype: object

In [11]:
# cumsum函数用于逐行求合
#                                A列                           B列
# 第a行：                       = 1.4                         = NaN
# 第b行：1.4 + NaN              = NaN,  NaN + NaN             = NaN
# 第c行：1.4 + NaN + 0.75       = 2.15，NaN + NaN - 1.3       = -1.3
# 第d行：1.4 + NaN + 0.75 + 7.1 = 9.25，NaN + NaN - 1.3 - 4.5 = -5.8
df.cumsum()

Unnamed: 0,A,B
a,1.4,
b,,
c,2.15,-1.3
d,9.25,-5.8


In [18]:
# describe函数对于数值型数据的汇总统计
df.describe()

Unnamed: 0,A,B
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [19]:
# describe函数对于非数值型数据的汇总统计
s = pd.Series(['a', 'a', 'b', 'c'] * 4)
s.describe()

count     16
unique     3
top        a
freq       8
dtype: object