In [38]:
import numpy as np
import pandas as pd

np.set_printoptions(suppress=True,precision=3)

In [39]:
data = pd.read_csv('gdp.csv',header=None)
data = data.rename({0:'GDP',1:'第一产业',2:'第二产业',3:'第三产业'},axis=1)
data

Unnamed: 0,GDP,第一产业,第二产业,第三产业
0,1988,386,839,763
1,2061,408,846,808
2,2335,422,960,953
3,2750,482,1258,1010
4,3356,511,1577,1268
5,3806,561,1893,1352


In [40]:
x_mat = np.array(data)

In [71]:
## 定义函数求解关联度
def gray_analysis(x,rho=0.5):
    x_mean = x.mean(axis=0)
    x = x/x_mean
    X = x[:,1:]  #提取所有子序列
    Y = x[:,0].reshape(X.shape[0],1)  #提取母序列
    abs_x0_xi = np.abs(X-Y)
    a = np.min(abs_x0_xi)
    b = np.max(abs_x0_xi)
    gamma_mat = (a+rho*b)/(abs_x0_xi+rho*b)
    corre_degree = np.mean(gamma_mat,axis=0)
    # print(gamma_mat)
    print("子序列中各个指标的灰色关联度分别为：",corre_degree)
    return corre_degree

In [72]:
gray_analysis(x_mat)

子序列中各个指标的灰色关联度分别为： [0.508 0.624 0.757]


array([0.508, 0.624, 0.757])

### 用于确定权重

In [47]:
class Topsis:
    def __init__(self,X,**typ):
        # 所有待转换的类型
        x_mat = X.copy()
        ctype = ['cmin','cmedian','crange']
        if typ:
            # 提取待转换类型及对应的列为一个新字典
            type_dic = dict([(t,typ[t]) for t in ctype if t in typ.keys()])
            position = sum(type_dic.values(),[])

            for col_wait_for_convert in position:
                convert_type = [k for k, v in typ.items() if col_wait_for_convert in v][0]
                current_index = typ[convert_type].index(col_wait_for_convert)
                if convert_type == 'cmedian':
                    x_mat.iloc[:,col_wait_for_convert] = self.positivization(x_mat[col_wait_for_convert], convert_type,typ['best_median'][current_index])
                
                elif convert_type == 'crange':
                    x_mat.iloc[:,col_wait_for_convert] = self.positivization(x_mat[col_wait_for_convert], convert_type,typ['best_range'][current_index])
                
                else:
                    x_mat.iloc[:,col_wait_for_convert] = self.positivization(x_mat[col_wait_for_convert],convert_type)
        else:
            print('无需正向化')
            
        self.x_mat = x_mat
            
    def positivization(self, col ,t, best=None):
        if t == 'cmin':
            posit = col.max() - col
            return posit
        elif t == 'cmedian':
            m = max(abs(col - best))
            posit = 1 - abs(col - best) / m
            return posit
        else:
            posit = col
            t == 'crange'
            a,b = best
            m = max(np.append(a-min(col),max(col)-b))
            x_row = col.shape[0]
            for i in range(x_row):
                if col[i] < a:
                    posit[i] = 1 - (a-col[i]) / m
                elif col[i] > b:
                    posit[i] = 1 - (col[i]-b) / m
                else:
                    posit[i] = 1
            return posit

In [60]:
river_data = pd.read_csv('river.csv',header=None)
river_data.sample(5)

Unnamed: 0,0,1,2,3
14,2.04,6.4,23.0,17.91
5,2.39,6.77,38.0,24.62
3,8.61,7.05,46.0,26.43
17,8.29,8.41,39.0,12.02
13,2.01,5.55,47.0,26.31


In [61]:
tp = Topsis(X=river_data,cmin=[2],cmedian=[1],best_median=[7],crange=[3],best_range=[[10,20]])

In [103]:
def score(processed_x, raw_data):
    after_processed = processed_x
    temp_mat = after_processed.copy()
    temp_mat = temp_mat / temp_mat.mean(axis=0)
    main_col = temp_mat.max(axis=1)
    temp_mat.insert(0,column='母序列',value=main_col)
    corr = gray_analysis(np.array(temp_mat))
    col_weight = corr / corr.sum()
    object_score = (raw_data *  col_weight).sum(axis=1)
    standar_score = object_score / sum(object_score)
    return standar_score

In [104]:
score(tp.x_mat,river_data)

子序列中各个指标的灰色关联度分别为： [0.646 0.607 0.525 0.647]


0     0.057808
1     0.028172
2     0.055168
3     0.071119
4     0.069670
5     0.057809
6     0.045839
7     0.062283
8     0.031623
9     0.030902
10    0.025951
11    0.045317
12    0.043471
13    0.064533
14    0.040179
15    0.064298
16    0.056917
17    0.053891
18    0.051634
19    0.043416
dtype: float64