In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [3]:
###熵权法
def entropy_weight_calc(data,scale=True):
    '''
    利用熵权法计算权重
    input:
    data: dataframe, 样本的指标数据
    scale: bool, 是否归一化, 默认True
    output:
    weight: dataframe, 指标权重
    '''
    data_std = data.copy()
    if scale:
        scaler = MinMaxScaler()
        data_std = scaler.fit_transform(data_std)
        data_std = pd.DataFrame(data_std,index=data.index,columns=data.columns)
    #计算信息熵
    k = 1/np.log(len(data_std))
    data_std = data_std/data_std.sum(axis=0)
    data_std = data_std*np.log(data_std)
    data_std = pd.DataFrame(np.nan_to_num(data_std),index=data.index,columns=data.columns)
    entropy = -k*(data_std.sum(axis=0))
    #计算权重
    weight = (1-entropy)/np.sum(1-entropy)
    weight = pd.DataFrame(weight,columns=['weight'])
    return weight        

In [4]:
###主成分分析法
def pca_weight_calc(data,scale=True,threshold=0.8,random_state=0):
    '''
    利用主成分分析法计算权重
    input:
    data: dataframe, 样本的指标数据
    scale: bool, 是否归一化, 默认True
    threshold: float, 主成分累计方差贡献度阈值, 默认为0.8
    random_state: int, 随机参数, 默认为0
    ouput:
    weight: dataframe, 指标权重
    '''
    data_std = data.copy()
    if scale:
        scaler = MinMaxScaler()
        data_std = scaler.fit_transform(data_std)
        data_std = pd.DataFrame(data_std,index=data.index,columns=data.columns)
    #确定主成分个数
    pca = PCA(random_state=random_state)
    pca.fit(data_std)
    component_num = np.where(pca.explained_variance_ratio_.cumsum()>threshold)[0][0]+1
    #计算主成分系数
    pca = PCA(n_component=component_num,random_state=random_state)
    pca.fit(data_std)
    data_pca = pd.DataFrame(pca.components_,columns=data.columns).T.abs()
    singular_value_root = np.sqrt(pca.singular_values_)
    data_pca = data_pca/singular_value_root
    #计算权重
    variance_ratio = pca.explained_variance_ratio_
    weight = (data_pca*variance_ratio).sum(axis=1)/np.sum(variance_ratio)
    weight = weight/np.sum(weight)
    weight = pd.DataFrame(weight,columns=['weight'])
    return weight    