## 习题1 ：
### 一个公司底下有 5 个煤矿，存在 9 个评价标准。对该公司综合效益进行打分评价。

# 数据预处理

In [2]:
import numpy as np
import pandas as pd

# ===== 示例：煤矿5个 × 9个指标 =====
# （你可以替换成真实数据 DataFrame）
data = pd.DataFrame({
    "Profit":[120, 150, 130, 160, 140],
    "Production":[300, 400, 350, 380, 370],
    "Sales":[280, 390, 340, 360, 365],
    "Efficiency":[10, 12, 11, 13, 12],
    "Turnover":[2.5, 3.0, 2.8, 3.1, 3.0],
    "Recovery":[0.80, 0.85, 0.83, 0.88, 0.86],
    "Cost":[100, 98, 99, 101, 97],
    "Ash":[20, 18, 19, 21, 17],
    "Fatality":[50, 40, 60, 70, 30]
}, index=["Mine_A","Mine_B","Mine_C","Mine_D","Mine_E"])

# 指标类型（True=效益型, False=成本型）
benefit_flags = [True, True, True, True, True, True, False, False, False]

# ===== Step1: 正向化 + 标准化 =====
X = data.values.astype(float)
m, n = X.shape
X_norm = np.zeros_like(X)

for j in range(n):
    col = X[:, j]
    if benefit_flags[j]:  # 效益型
        X_norm[:, j] = (col - col.min()) / (col.max() - col.min())
    else:  # 成本型
        X_norm[:, j] = (col.max() - col) / (col.max() - col.min())

# 转成 DataFrame 方便查看
X_norm_df = pd.DataFrame(X_norm, index=data.index, columns=data.columns)

# ===== Step2: CRITIC 权重 =====
stds = X_norm.std(axis=0, ddof=1)  # 每列标准差
corr_matrix = np.corrcoef(X_norm.T)  # n×n 相关系数矩阵
C = stds * (np.sum(1 - corr_matrix, axis=1))  # 信息量强度
weights = C / np.sum(C)

weights_df = pd.DataFrame({"Weight":weights}, index=data.columns)

# ===== Step3: TOPSIS 综合评价 =====
V = X_norm * weights  # 加权标准化矩阵
v_pos = V.max(axis=0)  # 正理想解
v_neg = V.min(axis=0)  # 负理想解

D_pos = np.sqrt(((V - v_pos)**2).sum(axis=1))
D_neg = np.sqrt(((V - v_neg)**2).sum(axis=1))

C_scores = D_neg / (D_pos + D_neg)

result = pd.DataFrame({
    "C_score": C_scores,
    "Rank": C_scores.argsort()[::-1] + 1
}, index=data.index)

print("=== 权重 ===")
print(weights_df)
print("\n=== 综合评价结果 ===")
print(result.sort_values("Rank"))


=== 权重 ===
              Weight
Profit      0.110831
Production  0.074299
Sales       0.068793
Efficiency  0.096390
Turnover    0.082406
Recovery    0.094838
Cost        0.146774
Ash         0.146774
Fatality    0.178896

=== 综合评价结果 ===
         C_score  Rank
Mine_E  0.806816     1
Mine_B  0.750313     2
Mine_D  0.430955     3
Mine_C  0.395738     4
Mine_A  0.267913     5


In [3]:
import numpy as np
import pandas as pd

# -----------------------
# 输入：data 为 m×n 的 DataFrame；benefit_flags 为长度 n 的布尔列表（True=效益型，False=成本型）
# 你把真实数据贴进来即可
# -----------------------
data = pd.DataFrame({
    "Profit":[120,150,130,160,140],
    "Production":[300,400,350,380,370],
    "Sales":[280,390,340,360,365],
    "Efficiency":[10,12,11,13,12],
    "Turnover":[2.5,3.0,2.8,3.1,3.0],
    "Recovery":[0.80,0.85,0.83,0.88,0.86],
    "Cost":[100,98,99,101,97],
    "Ash":[20,18,19,21,17],
    "Fatality":[50,40,60,70,30]
}, index=["Mine_A","Mine_B","Mine_C","Mine_D","Mine_E"])
benefit_flags = [True, True, True, True, True, True, False, False, False]

# ========== 1) 正向化 ==========
X = data.values.astype(float)
m, n = X.shape
X_pos = np.zeros_like(X, dtype=float)

for j in range(n):
    col = X[:, j]
    if benefit_flags[j]:  # 效益型：越大越好
        X_pos[:, j] = col
    else:                 # 成本型：越小越好 → 线性正向化（极差法）
        X_pos[:, j] = col.max() - col

# ========== 2) 标准化（选择其一） ==========

def minmax_scale(A):
    A = A.astype(float).copy()
    for j in range(A.shape[1]):
        cmin, cmax = A[:, j].min(), A[:, j].max()
        if cmax > cmin:
            A[:, j] = (A[:, j] - cmin) / (cmax - cmin)
        else:
            A[:, j] = 0.0  # 常数列
    return A

def vector_scale(A):
    A = A.astype(float).copy()
    norms = np.linalg.norm(A, axis=0)
    norms[norms==0] = 1.0
    return A / norms

def zscore_scale(A):
    A = A.astype(float).copy()
    mu = A.mean(axis=0)
    std = A.std(axis=0, ddof=1)
    std[std==0] = 1.0
    return (A - mu) / std

# 选用一种：极差更直观；向量归一更贴近经典TOPSIS；z-score适合近似正态数据
X_std = minmax_scale(X_pos)   # ← 可改：vector_scale(X_pos) / zscore_scale(X_pos)

# ========== 3) 理想点 ==========
v_pos = X_std.max(axis=0)  # 正理想
v_neg = X_std.min(axis=0)  # 负理想

# ========== 4) 距离函数 ==========
def euclidean(A, b):
    # A: m×n, b: n
    return np.sqrt(((A - b)**2).sum(axis=1))

def manhattan(A, b):
    return np.abs(A - b).sum(axis=1)

def mahalanobis(A, b):
    # 马氏距离（考虑指标相关性）；对小样本可能不稳定，可加正则
    S = np.cov(A, rowvar=False)
    # 稳定性：对角线加一点点噪声
    eps = 1e-6
    S_reg = S + eps * np.eye(S.shape[0])
    invS = np.linalg.inv(S_reg)
    diff = A - b
    return np.sqrt(np.einsum('ij,jk,ik->i', diff, invS, diff))

# 选择距离度量
dist = euclidean           # 可切换：manhattan / mahalanobis

D_pos = dist(X_std, v_pos)
D_neg = dist(X_std, v_neg)

C = D_neg / (D_pos + D_neg)
rank = pd.Series(C, index=data.index, name="C_score").rank(ascending=False, method="dense").astype(int)

res = pd.DataFrame({"C_score": C, "Rank": rank}).sort_values("Rank")
print("=== 无权TOPSIS（到理想点距离）结果 ===")
print(res)


=== 无权TOPSIS（到理想点距离）结果 ===
         C_score  Rank
Mine_B  0.767644     1
Mine_E  0.761391     2
Mine_D  0.562993     3
Mine_C  0.421003     4
Mine_A  0.184002     5
