In [119]:
import pandas as pd
import numpy as np
import math
from numpy import array
import matplotlib.pyplot as plt

In [120]:
# Original data
data = np.array([
    [61.0, 66.0, 62.0, 61.0, 82.0, 81.0, 70.0, 70.0, 78.0, 78.0, 79.0, 50.0, 52.0],
    [60.0, 63.0, 62.0, 62.0, 62.0, 69.0, 61.0, 74.0, 88.0, 77.0, 86.0, 72.0, 64.0],
    [62.0, 69.0, 65.0, 62.0, 69.0, 68.0, 63.0, 60.0, 75.0, 66.0, 75.0, 62.0, 63.0],
    [80.0, 68.0, 82.0, 61.0, 66.0, 85.0, 83.0, 81.0, 69.0, 88.0, 69.0, 60.0, 66.0],
    [220.0, 4953.5, 6048.2, 62.9, 0.6, 7715.0, 0.7, 36.1, 79.5, 238.0, 6.0, 17920.0, 3.8],
    [71.0, 76.0, 71.0, 48.0, 68.0, 67.0, 71.0, 72.0, 79.0, 77.0, 76.0, 43.0, 70.0],
    [40.0, 77.0, 44.0, 70.0, 68.0, 96.0, 66.0, 65.0, 95.0, 74.0, 77.0, 43.0, 60.0],
    [42.0, 86.0, 43.0, 55.0, 87.0, 84.0, 51.0, 53.0, 89.0, 85.0, 89.0, 51.0, 51.0],
    [53.0, 88.0, 68.0, 50.0, 89.0, 88.0, 52.0, 54.0, 85.0, 89.0, 69.0, 52.0, 55.0],
    [13.0, 0.0, 0.0, 13.0, 0.0, 8.0, 0.0, 8.0, 0.0, 0.0, 6.5, 27.0, 8.0],
    [71.0, 100.0, 73.0, 71.0, 85.0, 79.0, 73.0, 70.0, 100.0, 88.0, 76.0, 71.0, 70.0],
    [30866.00004, 1676760.0, 7275.84, 5837.12, 1116.06, 20059.0, 63.84, 350.17, 1240.2, 51884.0, 45.6, 5254144.0, 162.64],
    [63.0, 66.0, 65.0, 64.0, 64.0, 66.0, 64.0, 51.0, 66.0, 65.0, 69.0, 60.0, 61.0],
    [62.0, 78.0, 64.0, 68.0, 87.0, 73.0, 66.0, 60.0, 94.0, 97.0, 87.0, 61.0, 64.0],
    [80.0, 62.0, 90.0, 52.0, 55.0, 87.0, 81.0, 60.0, 75.0, 68.0, 79.0, 60.0, 65.0],
    [60.0, 79.0, 82.0, 62.0, 69.0, 65.0, 63.0, 65.0, 66.0, 89.0, 88.0, 65.0, 72.0],
    [60.0, 67.0, 66.0, 62.0, 68.0, 62.0, 65.0, 62.0, 61.0, 69.0, 64.0, 60.0, 61.0]
])

In [121]:
def standardize_column(row, mode="max"):
    row = np.array(row, dtype=float)
    if mode == "max":
        return (row - row.min()) / (row.max() - row.min())
    elif mode == "min":
        return (row.max() - row) / (row.max() - row.min())
    else:
        raise ValueError("Mode must be 'max' or 'min'.")

In [122]:
def calculate_entropy(row):
    row = np.array(row, dtype=float)
    probabilities = row / row.sum()
    probabilities = probabilities[probabilities > 0]  # Avoid log(0)
    return -np.sum(probabilities * np.log(probabilities))

In [123]:
# Transpose the data to make rows represent features转置数据
data_transposed = np.array(data).T

# Initialize standardized data初始化标准化数据
standardized_data_by_row = np.zeros_like(data_transposed)

# Apply standardization row-wise逐行应用标准化
for i, row in enumerate(data_transposed):
    if i == 4:  # Fifth row (now feature 5): minimize
        standardized_data_by_row[i] = standardize_column(row, mode="min")
    else:  # Other rows: maximize
        standardized_data_by_row[i] = standardize_column(row, mode="max")

# Transpose back to original structure (features in rows)转置回原始结构
standardized_data_by_row = standardized_data_by_row.T

# Recalculate entropy for each row (now features)重新计算每行（现在是特征）的熵值
rechecked_entropies_by_row = np.array([calculate_entropy(row) for row in standardized_data_by_row])

# Normalize entropies so they sum to 1归一化熵值使其总和为1
normalized_entropies_by_row = rechecked_entropies_by_row / rechecked_entropies_by_row.sum()

In [124]:
rechecked_entropies_by_row

array([1.57118794, 1.59168976, 1.57854349, 1.5921362 , 1.32410475,
       1.59441674, 1.58788919, 1.54101037, 1.57956106, 0.15481391,
       1.61013041, 2.46938038, 1.56157745, 1.57955807, 1.56510734,
       1.57731717, 1.57597246])

In [125]:
normalized_entropies_by_row

array([0.06030414, 0.06109102, 0.06058645, 0.06110816, 0.05082078,
       0.06119569, 0.06094515, 0.05914589, 0.06062551, 0.00594195,
       0.0617988 , 0.09477788, 0.05993528, 0.06062539, 0.06007076,
       0.06053939, 0.06048777])

In [126]:
import pandas as pd
import numpy as np
import math
from numpy import array
import matplotlib.pyplot as plt
# 读取表格文件
file_path = 'predictions100.xlsx'  # 替换为你的文件路径
df = pd.read_excel(file_path, header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,61.000000,60.000000,62.000000,80.000000,220.0,71.000000,40.000000,42.000000,53.000000,13.0,71.000000,3.086600e+04,63.000000,62.000000,80.000000,60.000000,60.000000
1,66.000000,63.000000,69.000000,68.000000,4953.5,76.000000,77.000000,86.000000,88.000000,0.0,100.000000,1.676760e+06,66.000000,78.000000,62.000000,79.000000,67.000000
2,62.000000,62.000000,65.000000,82.000000,6048.2,71.000000,44.000000,43.000000,68.000000,0.0,73.000000,7.275840e+03,65.000000,64.000000,90.000000,82.000000,66.000000
3,61.000000,62.000000,62.000000,61.000000,62.9,48.000000,70.000000,55.000000,50.000000,13.0,71.000000,5.837120e+03,64.000000,68.000000,52.000000,62.000000,62.000000
4,82.000000,62.000000,69.000000,66.000000,0.6,68.000000,68.000000,87.000000,89.000000,0.0,85.000000,1.116060e+03,64.000000,87.000000,55.000000,69.000000,68.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,66.130035,67.876639,65.689118,69.976517,16.7,67.886190,72.802979,67.858612,67.833403,13.0,79.554705,7.720410e+03,62.493128,71.775844,64.805158,70.150586,63.011153
96,71.419302,69.657646,67.398855,68.816412,800.0,67.117347,77.448140,75.627754,69.973423,8.0,79.453299,8.336000e+04,64.031528,76.655959,64.400920,69.643842,63.364231
97,70.608997,69.664561,66.615888,75.680403,774.5,69.404772,76.338856,69.858487,71.357286,2.4,80.411351,6.118550e+03,63.493094,72.201452,73.244626,70.422228,63.292906
98,74.680543,72.098536,66.661905,79.446729,489.4,73.364194,72.929852,67.062761,68.113389,0.0,78.311734,1.874402e+04,62.022157,73.104389,76.365896,69.096859,63.750512


In [127]:
data = df
data = np.array(data)

In [128]:
import numpy as np


# 定义标准化函数
def standardize_column(column, mode="max"):
    column = np.array(column, dtype=float)
    if mode == "max":
        return (column - column.min()) / (column.max() - column.min())
    elif mode == "min":
        return (column.max() - column) / (column.max() - column.min())
    else:
        raise ValueError("Mode must be 'max' or 'min'.")

# 定义计算熵的函数
def calculate_entropy(column):
    column = np.array(column, dtype=float)
    probabilities = column / column.sum()
    probabilities = probabilities[probabilities > 0]  # 避免 log(0)
    return -np.sum(probabilities * np.log(probabilities))

# 初始化标准化数据
standardized_data = np.zeros_like(data)

# 应用标准化列-wise
for i in range(data.shape[1]):
    if i == 4:  # 第五列（索引为4）：极小值
        standardized_data[:, i] = standardize_column(data[:, i], mode="min")
    else:  # 其他列：极大值
        standardized_data[:, i] = standardize_column(data[:, i], mode="max")

# 计算每列的熵值
entropies = np.array([calculate_entropy(col) for col in standardized_data.T])

# 归一化熵值使其总和为1
weights = entropies / entropies.sum()

# weights 即为每列的权重，形状为 (n_features,)

In [129]:
weights

array([0.06294601, 0.06190906, 0.06263205, 0.06233453, 0.06330322,
       0.06316696, 0.0629336 , 0.06258831, 0.06241387, 0.05405816,
       0.06167129, 0.00692863, 0.06316386, 0.06225412, 0.06267545,
       0.06248615, 0.06253473])

In [130]:
def standardize_column(row, mode="max"):
    row = np.array(row, dtype=float)
    if mode == "max":
        return (row - row.min()) / (row.max() - row.min())
    elif mode == "min":
        return (row.max() - row) / (row.max() - row.min())
    else:
        raise ValueError("Mode must be 'max' or 'min'.")

In [131]:
def calculate_entropy(row):
    row = np.array(row, dtype=float)
    probabilities = row / row.sum()
    probabilities = probabilities[probabilities > 0]  # Avoid log(0)
    return -np.sum(probabilities * np.log(probabilities))

In [138]:
# Transpose the data to make rows represent features转置数据
data_transposed = np.array(data).T

# Initialize standardized data初始化标准化数据
standardized_data_by_row = np.zeros_like(data_transposed)

# Apply standardization row-wise逐行应用标准化
for i, row in enumerate(data_transposed):
    if i == 4:  # Fifth row (now feature 5): minimize
        standardized_data_by_row[i] = standardize_column(row, mode="min")
    else:  # Other rows: maximize
        standardized_data_by_row[i] = standardize_column(row, mode="max")

# Recalculate entropy for each row (now features)重新计算每行（现在是特征）的熵值
rechecked_entropies_by_row = np.array([calculate_entropy(row) for row in standardized_data_by_row])

# Normalize entropies so they sum to 1归一化熵值使其总和为1
normalized_entropies_by_row = rechecked_entropies_by_row / rechecked_entropies_by_row.sum()

In [139]:
rechecked_entropies_by_row

array([4.56918979, 4.49391884, 4.5463998 , 4.52480306, 4.59511975,
       4.5852284 , 4.56828928, 4.54322461, 4.53056242, 3.92402916,
       4.47665937, 0.50294222, 4.5850035 , 4.51896657, 4.54954991,
       4.53580899, 4.53933514])

In [140]:
normalized_entropies_by_row

array([0.06294601, 0.06190906, 0.06263205, 0.06233453, 0.06330322,
       0.06316696, 0.0629336 , 0.06258831, 0.06241387, 0.05405816,
       0.06167129, 0.00692863, 0.06316386, 0.06225412, 0.06267545,
       0.06248615, 0.06253473])