In [1]:
print("""
@File         : Transformations.ipynb
@Author(s)    : Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime  : 2024-12-26 21:44:12
@Email        : cuixuanstephen@gmail.com
@Description  : Transformation
""")


@File         : Transformations.ipynb
@Author(s)    : Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime  : 2024-12-26 21:44:12
@Email        : cuixuanstephen@gmail.com
@Description  : Transformation



In [2]:
import pandas as pd
import numpy as np

与聚合相反，转换不会将值数组缩减为单个值，而是保持调用对象的形状，用于计算诸如“组总数的百分比”之类的内容。

In [3]:
ser = pd.Series([-1, 0, 1], dtype=pd.Int64Dtype())

In [4]:
def adds_one(ser: pd.Series) -> pd.Series:
    return ser + 1

In [5]:
ser.transform(['abs', adds_one])

Unnamed: 0,abs,adds_one
0,1,0
1,0,1
2,1,2


就像 `pd.DataFrame.agg` 默认会聚合每列一样，`pd.DataFrame.transform` 默认会转换每列。

In [6]:
df = pd.DataFrame(
    np.arange(-5, 4, 1).reshape(3, -1)
).convert_dtypes(dtype_backend='numpy_nullable')
df

Unnamed: 0,0,1,2
0,-5,-4,-3
1,-2,-1,0
2,1,2,3


省去实现细节，调用类似 `df.transform('abs')` 的方法将把绝对值函数分别应用于每一列，然后将结果拼凑为 `pd.DataFrame`：

In [7]:
df.transform('abs')

Unnamed: 0,0,1,2
0,5,4,3
1,2,1,0
2,1,2,3


如果将多个转换函数传递给 `pd.DataFrame.transform`，最终会得到一个带有 `pd.MultiIndex` 的 `pd.DataFrame`：

In [8]:
def add_42(ser: pd.Series):
    return ser + 42

In [9]:
df.transform(['abs', add_42])

Unnamed: 0_level_0,0,0,1,1,2,2
Unnamed: 0_level_1,abs,add_42,abs,add_42,abs,add_42
0,5,37,4,38,3,39
1,2,40,1,41,0,42
2,1,43,2,44,3,45
