# Hello RRSR!

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from scipy.stats import norm
import itertools

In [2]:
!pwd

/Users/zhiyuzhang/rspr


In [3]:
# 1. 加载Iris数据集并划分为训练集和测试集
iris = load_iris()
X = iris.data  # 四个属性
y = iris.target  # 三个类 (0, 1, 2)
num_classes = len(np.unique(iris.target))
num_attributes = iris.data.shape[1]
# 将数据集划分为训练集和测试集，乱序
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

## RPS generation method

### Step 1: 
Establish Gaussian discriminant model (GDM), and then construct membership vector based on the GDM.

In [4]:
# 2. 计算每个类中每个属性的 mean value and standard deviation (无偏估计)
mean_std_by_class = []
for class_label in np.unique(y_train):
    X_class = X_train[y_train == class_label]
    mean_std = [(np.mean(X_class[:, i]), np.std(X_class[:, i], ddof=1)) for i in range(X_class.shape[1])]
    mean_std_by_class.append(mean_std)

mean_std_by_class = np.array(mean_std_by_class)
print("每个类中每个属性的均值和标准差:\n", mean_std_by_class)
print("Shape of mean_std_by_class:\n", mean_std_by_class.shape)

每个类中每个属性的均值和标准差:
 [[[4.99       0.3564785 ]
  [3.4525     0.39547926]
  [1.45       0.18397324]
  [0.245      0.10609623]]

 [[5.9195122  0.54231887]
  [2.77073171 0.32034661]
  [4.24146341 0.4811318 ]
  [1.32195122 0.20556288]]

 [[6.53333333 0.65386838]
  [2.96666667 0.31898963]
  [5.52051282 0.5415278 ]
  [2.         0.2901905 ]]]
Shape of mean_std_by_class:
 (3, 4, 2)


In [5]:
# 3. 为每个类和每个属性建立高斯分布函数，并对测试集中随机选取的一个样本进行预测

# 保存下(3,4)个Gaussian distribution函数
# 创建一个(3,4)的函数数组，用来存储每个类中每个属性的高斯分布函数
gaussian_functions = np.empty((3, 4), dtype=object)

# 初始化并保存高斯分布函数
for class_label in range(num_classes):
    for i in range(num_attributes):  # 四个属性
        mean, std = mean_std_by_class[class_label, i]
        # 保存高斯分布函数
        gaussian_functions[class_label, i] = norm(loc=mean, scale=std)

# 随机选择一个测试集中的样本
test_sample = X_test[np.random.randint(0, len(X_test))]

# 计算该测试样本在每个类中每个属性的高斯分布结果
gaussian_results = []
for class_label in range(num_classes):
    class_results = []
    for i in range(num_attributes):  # 四个属性
        # 调用保存的高斯分布函数，计算概率密度值
        pdf_value = gaussian_functions[class_label, i].pdf(test_sample[i])
        class_results.append(pdf_value)
    gaussian_results.append(class_results)

gaussian_results = np.array(gaussian_results)
print("\n测试集中选取的样本:", test_sample)
print("\n每个类中每个属性的高斯分布函数值:\n", gaussian_results)


测试集中选取的样本: [5.1 3.8 1.5 0.3]

每个类中每个属性的高斯分布函数值:
 [[1.06708843e+00 6.85695585e-01 2.08985483e+00 3.28741763e+00]
 [2.34854656e-01 7.13875079e-03 7.38947675e-08 8.33764969e-06]
 [5.52049953e-02 4.12255042e-02 7.90310636e-13 4.85294087e-08]]


### Step 2: 
Perform weight analysis for the test sample.

In [6]:
column_sums = np.sum(gaussian_results, axis=0)
normalized_results = gaussian_results / column_sums
print("\n每个属性针对所有类的归一化后的MV (归一化后的高斯分布值):\n", normalized_results)


每个属性针对所有类的归一化后的MV (归一化后的高斯分布值):
 [[7.86272659e-01 9.34114016e-01 9.99999965e-01 9.99997449e-01]
 [1.73050133e-01 9.72502567e-03 3.53588029e-08 2.53622428e-06]
 [4.06772083e-02 5.61609585e-02 3.78165314e-13 1.47621295e-08]]


In [7]:
# 对归一化后的MV（normalized membership vector）进行降序排序，并保留原始顺序的索引
sorted_indices = np.argsort(-normalized_results, axis=0)  # 降序排序，使用负号实现降序
sorted_nmv = np.take_along_axis(normalized_results, sorted_indices, axis=0)  # 按照索引排序后的值
sorted_gaussian_functions = np.take_along_axis(gaussian_functions, sorted_indices, axis=0) # 按照索引排序后的GDM

# 打印结果
print("\n归一化后的MV降序排序的结果:\n", sorted_nmv)
print("\n每个元素排序前的原始类索引:\n", sorted_indices)



归一化后的MV降序排序的结果:
 [[7.86272659e-01 9.34114016e-01 9.99999965e-01 9.99997449e-01]
 [1.73050133e-01 5.61609585e-02 3.53588029e-08 2.53622428e-06]
 [4.06772083e-02 9.72502567e-03 3.78165314e-13 1.47621295e-08]]

每个元素排序前的原始类索引:
 [[0 0 0 0]
 [1 2 1 1]
 [2 1 2 2]]


In [8]:
x_mean_ord = np.empty((3, 4))
std_ord = np.empty((3, 4))


# mean_std_by_class 的 shape 是 (3, 4, 2)，索引 [class, attribute, 0] 获取均值，索引 [class, attribute, 1] 获取标准差
for attr_idx in range(num_attributes):  # 对每个属性进行操作
    for class_idx in range(num_classes):  # 对每个类进行操作
        sorted_class_idx = sorted_indices[class_idx, attr_idx]  # 获取排序后的类索引
        x_mean_ord[class_idx, attr_idx] = mean_std_by_class[sorted_class_idx, attr_idx, 0]  # 获取排序后的均值
        std_ord[class_idx, attr_idx] = mean_std_by_class[sorted_class_idx, attr_idx, 1]  # 获取排序后的标准差

print("\n排序后的 x_mean_ord:\n", x_mean_ord)
print("\n排序后的 std_ord:\n", std_ord)


排序后的 x_mean_ord:
 [[4.99       3.4525     1.45       0.245     ]
 [5.9195122  2.96666667 4.24146341 1.32195122]
 [6.53333333 2.77073171 5.52051282 2.        ]]

排序后的 std_ord:
 [[0.3564785  0.39547926 0.18397324 0.10609623]
 [0.54231887 0.31898963 0.4811318  0.20556288]
 [0.65386838 0.32034661 0.5415278  0.2901905 ]]


In [9]:
supporting_degree = np.exp(-np.abs(test_sample - x_mean_ord))

print("\nSupporting degree (支持度):\n", supporting_degree)


Supporting degree (支持度):
 [[0.89583414 0.70645201 0.95122942 0.94648515]
 [0.44064655 0.43459821 0.06447592 0.35989203]
 [0.23851255 0.35726828 0.01794376 0.18268352]]


In [10]:
# 生成所有按顺序选择的排列组合
def get_ordered_permutations(num_classes):
    result = []
    # 逐步增加元素数量
    for i in range(1, num_classes + 1):
        # 生成i个元素的全排列
        result.extend(itertools.permutations(range(i), i))
    return result

# 获取按顺序选择的排列组合
all_combinations = get_ordered_permutations(num_classes)
all_combinations

[(0,),
 (0, 1),
 (1, 0),
 (0, 1, 2),
 (0, 2, 1),
 (1, 0, 2),
 (1, 2, 0),
 (2, 0, 1),
 (2, 1, 0)]

In [11]:
# supporting_degree: 形状为 (3, 4) 的支持度矩阵 (3个类，4个属性)
# num_classes: 类别的数量 (3)
# num_attributes: 属性的数量 (4)


In [12]:
# 初始化权重矩阵 weight_matrix
num_combinations = len(all_combinations)  # 所有按顺序排列组合的数量 (应该是9)
weight_matrix = np.zeros((num_combinations, num_attributes))  # (9, 4)


# 对每个属性计算权重
for attr_idx in range(num_attributes):
    s = supporting_degree[:, attr_idx]  # 取出该属性对应的支持度 (3,)
    
    # 遍历每个组合，计算 w(i1...iu...iq)
    for comb_idx, combination in enumerate(all_combinations):
        q = len(combination)  # 该组合的长度
        weight = 1.0  # 初始化权重
        
        # 根据公式 (19) 计算权重
        for u in range(q):
            
            i_u = combination[u]  # 当前排列项 i_u
            numerator = s[i_u]  # 分子支持度
            denominator_sum = np.sum(s[list(combination[u:])])  # 分母，从 u 到 q 的支持度和
            weight *= numerator / denominator_sum  # 按公式累乘
        
        # 将计算好的权重保存到 weight_matrix
        weight_matrix[comb_idx, attr_idx] = weight

# 输出权重矩阵
print("\n权重矩阵 (Weight matrix):\n", weight_matrix)


权重矩阵 (Weight matrix):
 [[1.         1.         1.         1.        ]
 [0.67029336 0.61912438 0.93652104 0.72451139]
 [0.32970664 0.38087562 0.06347896 0.27548861]
 [0.36903517 0.25877034 0.71991093 0.42161243]
 [0.19975085 0.21272622 0.20035245 0.2140132 ]
 [0.22094973 0.19263669 0.06122211 0.20258851]
 [0.05882706 0.0974206  0.00115488 0.03910213]
 [0.10150734 0.14762783 0.01625765 0.08888576]
 [0.04992985 0.09081833 0.00110197 0.03379797]]


### Step 3: 
Construct weighted PMF based on weight vector and ONMV, and then generate weighted RPS.

In [13]:
# 计算 weighted PMF
def calculate_weighted_pmf(weight_matrix, sorted_nmv):
    num_combinations, num_attributes = weight_matrix.shape
    num_classes = sorted_nmv.shape[0]  # 获取类的数量（classes）
    
    # 获取排列组合
    all_combinations = get_ordered_permutations(num_classes)
    
    # 初始化 weighted_pmf 矩阵
    weighted_pmf = np.zeros_like(weight_matrix)
    
    # 记录当前组合数对应的起始位置
    current_row = 0
    
    # 遍历组合大小 i（从 1 到 num_classes）
    for i in range(1, num_classes + 1):
        num_permutations = len(list(itertools.permutations(range(i), i)))  # 当前大小的排列组合数量
        
        # 遍历每个属性 j
        for j in range(num_attributes):
            # 对于当前大小 i 的排列组合，使用 sorted_nmv[i-1, j]
            weighted_pmf[current_row:current_row + num_permutations, j] = (
                weight_matrix[current_row:current_row + num_permutations, j] * sorted_nmv[i-1, j]
            )
        
        # 更新起始行
        current_row += num_permutations
    
    return weighted_pmf


用于测试实现正确

In [14]:
# 示例
# 假设 weight_matrix 是一个 (9, 4) 的矩阵，sorted_nmv 是一个 (3, 4) 的矩阵
# sorted_nmv 示例数据
sorted_nmv_FORTEST = np.array([
    [0.5, 0.6, 0.7, 0.6189],
    [0.4, 0.5, 0.6, 0.3811],
    [0.3, 0.4, 0.5, 1.57e-32]
])

# weight_matrix 示例数据
weight_FORTEST = np.array([
    [1.0, 0.9, 0.8, 1],
    [0.6, 0.5, 0.4, 0.5374],
    [0.2, 0.1, 0.3, 0.4626],
    [0.5, 0.7, 0.6, 0.0828],
    [0.4, 0.6, 0.5, 0.0713],
    [0.3, 0.2, 0.4, 0.1284],
    [0.2, 0.3, 0.5, 0.3262],
    [0.1, 0.2, 0.3, 0.0990],
    [0.2, 0.1, 0.4, 0.2923]
])

# 调用函数计算 weighted PMF
weighted_FORTEST = calculate_weighted_pmf(weight_FORTEST, sorted_nmv_FORTEST)
print("\n测试用 PMF:\n", weighted_FORTEST)


测试用 PMF:
 [[5.0000000e-01 5.4000000e-01 5.6000000e-01 6.1890000e-01]
 [2.4000000e-01 2.5000000e-01 2.4000000e-01 2.0480314e-01]
 [8.0000000e-02 5.0000000e-02 1.8000000e-01 1.7629686e-01]
 [1.5000000e-01 2.8000000e-01 3.0000000e-01 1.2999600e-33]
 [1.2000000e-01 2.4000000e-01 2.5000000e-01 1.1194100e-33]
 [9.0000000e-02 8.0000000e-02 2.0000000e-01 2.0158800e-33]
 [6.0000000e-02 1.2000000e-01 2.5000000e-01 5.1213400e-33]
 [3.0000000e-02 8.0000000e-02 1.5000000e-01 1.5543000e-33]
 [6.0000000e-02 4.0000000e-02 2.0000000e-01 4.5891100e-33]]


In [15]:
weighted_pmf =  calculate_weighted_pmf(weight_matrix, sorted_nmv)
print("\nWeighted PMF:\n", weighted_pmf)


Weighted PMF:
 [[7.86272659e-01 9.34114016e-01 9.99999965e-01 9.99997449e-01]
 [1.15994356e-01 3.47706187e-02 3.31142627e-08 1.83752339e-06]
 [5.70557772e-02 2.13903398e-02 2.24454016e-09 6.98700891e-07]
 [1.50113206e-02 2.51654817e-03 2.72245342e-13 6.22389732e-09]
 [8.12530677e-03 2.06876794e-03 7.57663489e-14 3.15929060e-09]
 [8.98761830e-03 1.87339677e-03 2.31520798e-14 2.99063780e-09]
 [2.39292042e-03 9.47417844e-04 4.36735206e-16 5.77230666e-10]
 [4.12903541e-03 1.43568440e-03 6.14808075e-15 1.31214313e-09]
 [2.03100679e-03 8.83210545e-04 4.16727205e-16 4.98930016e-10]]


In [16]:
def get_acc_permutations(num):
    all_combinations_ = []
    for r in range(1, num + 1):  
        permutations_ = list(itertools.permutations(range(num), r))
        all_combinations_.extend(permutations_)

    return len(all_combinations_)
assert get_acc_permutations(3) == 15

In [17]:
RPS_w = []
for j in range(num_attributes):
    RPS_w_j = set()
    
    thetas = sorted_indices[:, j]
    weighted_pmf_j = weighted_pmf[:, j]
    
    for idx, combination in enumerate(all_combinations):
        A = thetas[list(combination)]
        M_A = weighted_pmf_j[idx]
        A = tuple((A))
        RPS_w_j.add((A, M_A))
    
    RPS_w.append(RPS_w_j)

RPS_w

[{((0,), 0.7862726588470111),
  ((0, 1), 0.11599435566876512),
  ((0, 1, 2), 0.015011320588306972),
  ((0, 2, 1), 0.00812530677259959),
  ((1, 0), 0.0570557772020016),
  ((1, 0, 2), 0.00898761830076162),
  ((1, 2, 0), 0.0023929204185468445),
  ((2, 0, 1), 0.0041290354087611305),
  ((2, 1, 0), 0.0020310067932459656)},
 {((0,), 0.934114015867886),
  ((0, 1, 2), 0.002068767936625385),
  ((0, 2), 0.034770618707499874),
  ((0, 2, 1), 0.0025165481725448417),
  ((1, 0, 2), 0.0014356843992384418),
  ((1, 2, 0), 0.000883210544506705),
  ((2, 0), 0.021390339755573005),
  ((2, 0, 1), 0.0018733967720819976),
  ((2, 1, 0), 0.0009474178440438135)},
 {((0,), 0.9999999646408189),
  ((0, 1), 3.311426271283788e-08),
  ((0, 1, 2), 2.7224534237667426e-13),
  ((0, 2, 1), 7.576634887970788e-14),
  ((1, 0), 2.2445401620240143e-09),
  ((1, 0, 2), 2.3152079820483157e-14),
  ((1, 2, 0), 4.3673520622196196e-16),
  ((2, 0, 1), 6.14808074823924e-15),
  ((2, 1, 0), 4.167272053875363e-16)},
 {((0,), 0.99999744901358

In [18]:
RPS_w[3]

{((0,), 0.9999974490135878),
 ((0, 1), 1.8375233913452928e-06),
 ((0, 1, 2), 6.2238973216287e-09),
 ((0, 2, 1), 3.159290601134572e-09),
 ((1, 0), 6.98700891485313e-07),
 ((1, 0, 2), 2.990637801270799e-09),
 ((1, 2, 0), 5.772306664111778e-10),
 ((2, 0, 1), 1.3121431316970016e-09),
 ((2, 1, 0), 4.989300164510114e-10)}

## RPSR rule of combination


### Step 1, 2
Set fusion order and reliability vector   
**Default is descending order**

In [19]:
default_fusion_order = [i for i in range(num_attributes)]
default_reliability_vector = [(1 - 0.5 * i / (num_attributes - 1)) for i in range(num_attributes)]
print("Default fusion order: ", default_fusion_order)
print("Default reliability vector: ", default_reliability_vector)

Default fusion order:  [0, 1, 2, 3]
Default reliability vector:  [1.0, 0.8333333333333334, 0.6666666666666667, 0.5]


In [20]:
def shuffle_by_defusion_order(data, order):
    reordered_data = [data[i] for i in order]
    return reordered_data

RPS_wv = shuffle_by_defusion_order(RPS_w, default_fusion_order)
RPS_wv

[{((0,), 0.7862726588470111),
  ((0, 1), 0.11599435566876512),
  ((0, 1, 2), 0.015011320588306972),
  ((0, 2, 1), 0.00812530677259959),
  ((1, 0), 0.0570557772020016),
  ((1, 0, 2), 0.00898761830076162),
  ((1, 2, 0), 0.0023929204185468445),
  ((2, 0, 1), 0.0041290354087611305),
  ((2, 1, 0), 0.0020310067932459656)},
 {((0,), 0.934114015867886),
  ((0, 1, 2), 0.002068767936625385),
  ((0, 2), 0.034770618707499874),
  ((0, 2, 1), 0.0025165481725448417),
  ((1, 0, 2), 0.0014356843992384418),
  ((1, 2, 0), 0.000883210544506705),
  ((2, 0), 0.021390339755573005),
  ((2, 0, 1), 0.0018733967720819976),
  ((2, 1, 0), 0.0009474178440438135)},
 {((0,), 0.9999999646408189),
  ((0, 1), 3.311426271283788e-08),
  ((0, 1, 2), 2.7224534237667426e-13),
  ((0, 2, 1), 7.576634887970788e-14),
  ((1, 0), 2.2445401620240143e-09),
  ((1, 0, 2), 2.3152079820483157e-14),
  ((1, 2, 0), 4.3673520622196196e-16),
  ((2, 0, 1), 6.14808074823924e-15),
  ((2, 1, 0), 4.167272053875363e-16)},
 {((0,), 0.99999744901358

### Step 3
For each RPS source RPSjv, produce RPS with reliability RPSjrv

In [None]:
def F_RPS_reliability(x):
    result = 0
    for k in range(x + 1):
        result += math.factorial(x) / math.factorial(x - k)
    return result

def get_PMF_with_reliability(RPS_wv, reliability_vector, num_classes):
    RPS_wv_r = []

    for v, RPS_wv_j in enumerate(RPS_wv):
        RPS_wv_r_j = set()
        
        r_v = reliability_vector[v]
    
        for A_tuple in RPS_wv_j:
            A, MA = A_tuple
            if len(A) == 1:
                MA_r = MA * r_v
            else:
                MA_r = MA * r_v + ((1 - r_v) / (F_RPS_reliability(num_classes) - num_classes - 1))
            RPS_wv_r_j.add((A, MA_r))
            
    
        RPS_wv_r.append(RPS_wv_r_j)
        
    return RPS_wv_r

RPS_wv_r = get_PMF_with_reliability(RPS_wv, default_reliability_vector, num_classes)
RPS_wv_r

### Step 4
Combine the K PMFs with reliability based on LOS or ROS

In [None]:
def right_intersection(A, B):
    """
    实现集合 A 和 B 的右正交 (RI)，即 B 减去所有不在 A 中的元素。
    :param A: 元组 A
    :param B: 元组 B
    :return: 右正交后的结果
    """
    # 计算 B 中不在 A 中的元素
    not_in_A = [item for item in B if item not in A]
    # 返回 B 减去这些元素的集合
    result = tuple(item for item in B if item not in not_in_A)
    return result

def left_intersection(A, B):
    """
    实现集合 A 和 B 的左正交 (LI)，即 A 减去所有不在 B 中的元素。
    :param A: 元组 A
    :param B: 元组 B
    :return: 左正交后的结果
    """
    # 计算 A 中不在 B 中的元素
    not_in_B = [item for item in A if item not in B]
    # 返回 A 减去这些元素的集合
    result = tuple(item for item in A if item not in not_in_B)
    return result

# 示例
A_test_tuple = ('R')
B_test_tuple = ('G')

# 计算右正交和左正交
ri_test_result = right_intersection(A_test_tuple, B_test_tuple)
li_test_result = left_intersection(A_test_tuple, B_test_tuple)

print("右正交 (RI) 结果:", ri_test_result)
print("左正交 (LI) 结果:", li_test_result)