利用apply快速解决dataframe中各行数据的加权和问题，对于其中的Nan值自动对加权系数进行再平衡

In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
df = pd.DataFrame({"A":[2, 6, 7, np.nan], "B":[34, np.nan, 1, 7], "C":[np.nan, 33, 21, 13], "D":[32, 15, 2, 65]})

In [3]:
df

Unnamed: 0,A,B,C,D
0,2.0,34.0,,32
1,6.0,,33.0,15
2,7.0,1.0,21.0,2
3,,7.0,13.0,65


In [4]:
# 定义各特征的权重
weights = {"A": 0.4, "B": 0.2, "C":0.1, "D": 0.3}
features = list(weights.keys())

In [5]:
# 各特征对应的距离值，可定义更加复杂的函数
values = {"A": 1, "B": 5, "C":10, "D": 30}

In [9]:
# 加权计算权重，逐行进行
def weight_add(series):
    weights_filter = []
    feature_filter = []
    values_filter = []
    for i in features:
        if not math.isnan(series[i]):    # 计算非空值
            feature_filter.append(i)
            weights_filter.append(weights.get(i))
            values_filter.append(values.get(i))
    weighted_weight =  np.array(weights_filter)/np.array(weights_filter).sum()    # 非空值对应权重的再平衡
    return (np.array(values_filter)*weighted_weight).sum()

In [10]:
df.apply(func=lambda x: weight_add(x), axis=1)

0    11.555556
1    13.000000
2    11.400000
3    18.333333
dtype: float64

In [11]:
df

Unnamed: 0,A,B,C,D
0,2.0,34.0,,32
1,6.0,,33.0,15
2,7.0,1.0,21.0,2
3,,7.0,13.0,65


In [17]:
df[['A_c','B_c']] = (df[['A', 'B']] - np.array([1,4]) )* np.array([3, 7])

In [18]:
df

Unnamed: 0,A,B,C,D,A_c,B_c
0,2.0,34.0,,32,3.0,210.0
1,6.0,,33.0,15,15.0,
2,7.0,1.0,21.0,2,18.0,-21.0
3,,7.0,13.0,65,,21.0
