In [3]:
# 1. 先從 preprocessing import 我們需要的東西
from preprocessing import MyScaler

# 2. 定義 fill_missing 函式
def fill_missing(df):
    # 用 age 平均值填補 age 欄，income 平均值填補 income 欄
    df_new = df.copy()
    df_new['age']    = df_new['age'].fillna(df_new['age'].mean())
    df_new['income'] = df_new['income'].fillna(df_new['income'].mean())
    return df_new

# 3. 定義 run_pipeline：依序呼叫 steps list
def run_pipeline(df, steps):
    df_curr = df.copy()
    for fn in steps:
        df_curr = fn(df_curr)
    return df_curr

# 4. 準備一份含缺值的測試資料
import pandas as pd
df_raw = pd.DataFrame({
    'age':    [25, None, 37, 29, None, 41],
    'income': [500, 700, None, 650, 800, None],
    'city':   ['A','B','A','C','B','C']  # 非數值欄位，不會被標準化
})

# 5. 組合 steps：先填補再標準化
steps = [
    fill_missing,
    MyScaler().fit_transform
]

# 6. 執行 pipeline
df_clean = run_pipeline(df_raw, steps)

# 7. 檢視結果
print(df_clean)
print("\n→ 標準化後各欄位 mean／std：")
print(df_clean[['age','income']].mean(), df_clean[['age','income']].std())

✅ 標準化檢查通過：mean = {'age': 0.0, 'income': 3.700743415417188e-17} std = {'age': 0.9999999999999999, 'income': 1.0}
        age    income city
0 -1.414214 -1.678293    A
1  0.000000  0.387298    B
2  0.707107  0.000000    A
3 -0.707107 -0.129099    C
4  0.000000  1.420094    B
5  1.414214  0.000000    C

→ 標準化後各欄位 mean／std：
age       0.000000e+00
income    3.700743e-17
dtype: float64 age       1.0
income    1.0
dtype: float64
