# 秩和比综合评价法(Rank Sum Ratio, RSR)

In [9]:
# 编秩函数
def get_rank(data, columns, ascending=True, R=np.zeros(1)):
    if data.ndim == 1:
        tempdata = np.array(data)[:,None]
    if not R.any():
        R = np.zeros(tempdata.shape)
    for i in columns:
        arg = tempdata[:,i].argsort(axis=0)
        if not ascending:
            arg = arg[::-1]
        begin, end = 0, 0
        # 找从begin开始相同项, 用end标记最后一个相同项的下一个
        while begin < len(arg):
            while end < len(arg) and tempdata[arg[end]][i] == tempdata[arg[begin]][i]:
                end += 1
            for j in range(begin, end):
                R[arg[j]][i] = (begin + end + 1) / 2
            begin = end
    return R.reshape(data.shape)

In [10]:
# 例14.7

import numpy as np
import pandas as pd
from scipy.stats import norm
from sklearn import linear_model

data = np.loadtxt('14.D 医院工作质量统计指标.txt')
w = data[-1]
data = data[:-1]

# 编秩
R = np.zeros(data.shape)
# 效益型指标编秩
R = get_rank(data, columns=[1,5], ascending=False, R=R)
# 效益型指标编秩
R = get_rank(data, columns=[0,2,3,4], ascending=True, R=R)

# 求加权秩和比
WRSR = np.matmul(R, w)/R.shape[0]

# R_WRSR列名
R_WRSR_columns = ['x' + str(i+1) for i in range(R.shape[1])] + ['WRSR']
# R_WRSR行名
R_WRSR_index = [str(i) for i in range(1983, 1993)]
# 构建一个DataFrame. 课本表14.19
R_WRSR = pd.DataFrame(np.c_[R, WRSR], columns=R_WRSR_columns, index=R_WRSR_index)
R_WRSR_sorted = R_WRSR.sort_values(by='WRSR', ascending=True)
print(R_WRSR_sorted, '\n')

# f列名
f_columns = ['f', 'cf', 'p', 'Probit', 'WRSRfit', '排序']
f = pd.DataFrame(np.c_[np.ones((R_WRSR_sorted.shape[0], 1)), 
                       [i for i in range(1, 11)],
                       np.zeros((R_WRSR_sorted.shape[0], len(f_columns)-2))],
                 index=R_WRSR_sorted.index, columns=f_columns)
# 计算累积频率
f['p'] = f['cf'] / R.shape[0]
# 对最后一个累积频率修正
f['p'].iloc[-1] = 1 - 1/(4*R.shape[0])
# 计算概率单位, 按正态分布
f['Probit'] = norm.isf(1-f['p'], 0, 1)+5
# 以Probit为自变量, RSR/WRSR为因变量, 计算回归方程
reg = linear_model.LinearRegression()
reg.fit(np.array(f['Probit']).reshape((10, 1)), R_WRSR_sorted['WRSR'])
# 得到reg.coef_是系数, reg.intercept_是常数项. 以此计算f['WRSRfit']
f['WRSRfit'] = f['Probit'] * reg.coef_[0] + reg.intercept_
f['排序'] = np.array(f['WRSRfit'].argsort())[::-1] + 1
print(f)

        x1    x2    x3    x4    x5    x6     WRSR
1984   8.0   2.0   4.5   6.0   5.0   2.0  0.35820
1985  10.0   3.5   1.0   1.0   1.0   5.5  0.35975
1983   6.0   1.0   8.0   7.5   9.0   5.5  0.45385
1986   9.0   3.5   3.0   7.5   8.0   3.0  0.47070
1988   4.0   5.0   4.5   2.0   2.0  10.0  0.50420
1992   1.5  10.0   2.0   4.0   4.0   1.0  0.55345
1989   5.0   7.0   7.0   3.0   3.0   9.0  0.63400
1987   7.0   6.0   9.0   5.0   7.0   8.0  0.68050
1991   3.0   8.0   6.0   9.0  10.0   5.5  0.71695
1990   1.5   9.0  10.0  10.0   6.0   5.5  0.76840 

        f    cf      p    Probit   WRSRfit  排序
1984  1.0   1.0  0.100  3.718448  0.337123  10
1985  1.0   2.0  0.200  4.158379  0.400506   9
1983  1.0   3.0  0.300  4.475599  0.446209   8
1986  1.0   4.0  0.400  4.746653  0.485261   7
1988  1.0   5.0  0.500  5.000000  0.521762   6
1992  1.0   6.0  0.600  5.253347  0.558263   5
1989  1.0   7.0  0.700  5.524401  0.597315   4
1987  1.0   8.0  0.800  5.841621  0.643018   3
1991  1.0   9.0  0.900  6