In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Read the Excel file
file_path = 'data_example.xlsx'  # Replace with your file path
xlsx = pd.ExcelFile(file_path)

# Get all sheet names
sheet_names = xlsx.sheet_names

# Create a dictionary to store all DataFrames
dataframes = {}

# Iterate over all sheets and save each as a DataFrame
for sheet in sheet_names:
    dataframes[sheet] = pd.read_excel(xlsx, sheet_name=sheet)

# Now the dataframes dictionary contains all sheets, each sheet corresponds to a DataFrame
# You can access a specific DataFrame using dataframes['sheet_name']
df_grey = dataframes["grey relational matrix"]

# Iterate through every two columns for compression
# Get the list of column names
columns = df_grey.columns.tolist()

# Create a new DataFrame to store the compressed data
new_df = pd.DataFrame()

# Iterate through every two columns for compression
for i in range(1, len(columns), 2):
    low_col = columns[i]
    high_col = columns[i + 1]
    new_col_name = low_col.split()[0]  # Assuming column names are in the format 'I1 Low Value', 'I1 High Value', take the first part as the new column name
    new_df[new_col_name] = df_grey[[low_col, high_col]].values.tolist()

new_df = new_df.iloc[1:, :]
print(new_df)


              I1            I2            I3            I4            I5  \
1         [0, 0]  [0.41, 0.49]  [0.78, 0.97]     [0.91, 1]   [0.8, 0.98]   
2   [0.47, 0.55]        [0, 0]  [0.26, 0.33]  [0.55, 0.72]  [0.88, 0.95]   
3         [0, 0]        [0, 0]        [0, 0]   [0.02, 0.1]        [0, 0]   
4         [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
5         [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
6         [0, 0]        [0, 0]        [0, 0]        [0, 0]  [0.92, 0.99]   
7         [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
8         [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
9         [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
10        [0, 0]        [0, 0]        [0, 0]        [0, 0]  [0.75, 0.89]   
11        [0, 0]        [0, 0]        [0, 0]        [0, 0]   [0.2, 0.25]   
12        [0, 0]        [0, 0]        [0, 0]        [0, 0]        [0, 0]   
13     [0.82

Using the grey relational matrix, calculate the grey direct influence matrix.

In [2]:
# Assuming your DataFrame is named new_df
# Expand the values in the list to a new DataFrame
expanded_df = new_df.applymap(lambda x: x if isinstance(x, list) else [x])
expanded_df = pd.DataFrame(expanded_df.values.flatten().tolist())

# Find the maximum and minimum values
max_value = expanded_df.max().max()
min_value = expanded_df.min().min()

print(f"Maximum value: {max_value}")
print(f"Minimum value: {min_value}")

# Standardize the grey relational matrix
# Define the normalization function
def normalize(val, min_val, max_val):
    return [(val[0] - min_val) / (max_val - min_val), (val[1] - min_val) / (max_val - min_val)]

# Apply the normalization function
normalized_df = new_df.applymap(lambda x: normalize(x, min_value, max_value))
print(normalized_df)


Maximum value: 1.0
Minimum value: 0.0
              I1            I2            I3            I4            I5  \
1     [0.0, 0.0]  [0.41, 0.49]  [0.78, 0.97]   [0.91, 1.0]   [0.8, 0.98]   
2   [0.47, 0.55]    [0.0, 0.0]  [0.26, 0.33]  [0.55, 0.72]  [0.88, 0.95]   
3     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   [0.02, 0.1]    [0.0, 0.0]   
4     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   
5     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   
6     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]  [0.92, 0.99]   
7     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   
8     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   
9     [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   
10    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]  [0.75, 0.89]   
11    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]   [0.2, 0.25]   
12    [0.0, 0.0]    [0.0, 0.0]    [0.0, 0.0]    [0

In [3]:
# (1) Using the grey relational matrix from 2.1 to calculate the grey direct influence matrix in 2.2
def direct(val_n, min_val, max_val):
    y_k = (val_n[0] * (1 - val_n[0]) + val_n[1] * val_n[1]) / (1 - val_n[0] + val_n[1])
    z_k = min_val + y_k * (max_val - min_val)
    return z_k

grey_direct_df = normalized_df.applymap(lambda x: direct(x, min_value, max_value))
print(grey_direct_df)


          I1        I2        I3        I4        I5        I6        I7  \
1   0.000000  0.446296  0.934874  0.992569  0.949492  0.994074  0.346557   
2   0.510741  0.000000  0.281589  0.654615  0.942150  0.997619  0.966667   
3   0.000000  0.000000  0.000000  0.027407  0.000000  0.000000  0.994074   
4   0.000000  0.000000  0.000000  0.000000  0.000000  0.445455  0.043874   
5   0.000000  0.000000  0.000000  0.000000  0.000000  0.132609  0.281589   
6   0.000000  0.000000  0.000000  0.000000  0.984766  0.000000  0.918889   
7   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
8   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
9   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
10  0.000000  0.000000  0.000000  0.000000  0.859298  0.000000  0.000000   
11  0.000000  0.000000  0.000000  0.000000  0.211905  0.000000  0.958067   
12  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.230721   
13  0.972542

Using the grey direct influence matrix, calculate the grey comprehensive influence matrix.

In [4]:
# Find the maximum column sum
coloum_sum_max=grey_direct_df.sum().max()
# Normalization
def guiyi_process(x,coloum_sum_max):
    return (1/coloum_sum_max)*x
grey_direct_df_guiyi = grey_direct_df.applymap(lambda x: guiyi_process(x, coloum_sum_max))
grey_direct_df_guiyi


Unnamed: 0,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,D1,D2,D3,D4,D5,D6,D7
1,0.0,0.03759,0.078741,0.083601,0.079973,0.083728,0.029189,0.058776,0.083601,0.083051,0.0,0.0,0.0,0.055136,0.017848,0.00533,0.0,0.0,0.0
2,0.043018,0.0,0.023717,0.055136,0.079354,0.084026,0.081419,0.055136,0.081149,0.051698,0.083601,0.031084,0.017848,0.075684,0.070158,0.006409,0.007721,0.011169,0.0
3,0.0,0.0,0.0,0.002308,0.0,0.0,0.083728,0.060806,0.082944,0.0,0.0,0.072376,0.011169,0.046708,0.0,0.055136,0.02098,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.037519,0.003695,0.070158,0.081914,0.03413,0.0,0.070791,0.029189,0.081914,0.0,0.0,0.005022,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.011169,0.023717,0.014548,0.009954,0.002394,0.0,0.0,0.0,0.017848,0.084026,0.061696,0.070597,0.028126,0.006409
6,0.0,0.0,0.0,0.0,0.082944,0.0,0.077395,0.024925,0.032287,0.082944,0.0,0.052369,0.0,0.023717,0.0,0.075684,0.048952,0.006409,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083461,0.082944,0.0,0.052369,0.082944,0.0,0.081914,0.0,0.024925,0.029189,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.082944,0.0,0.0,0.077765,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083941,0.011169,0.0,0.082944,0.0,0.0
10,0.0,0.0,0.0,0.0,0.072376,0.0,0.0,0.030212,0.009954,0.0,0.0,0.079973,0.0,0.0747,0.0,0.017848,0.082944,0.0,0.0


In [5]:
all_zhuanjia_matrix = [grey_direct_df_guiyi, grey_direct_df_guiyi, grey_direct_df_guiyi]

# Concatenate all DataFrames together
combined_df = pd.concat(all_zhuanjia_matrix)

# Calculate the average value at each position
average_df = combined_df.groupby(combined_df.index).mean()

# Print the result
print(average_df)


          I1        I2        I3        I4        I5        I6        I7  \
1   0.000000  0.037590  0.078741  0.083601  0.079973  0.083728  0.029189   
2   0.043018  0.000000  0.023717  0.055136  0.079354  0.084026  0.081419   
3   0.000000  0.000000  0.000000  0.002308  0.000000  0.000000  0.083728   
4   0.000000  0.000000  0.000000  0.000000  0.000000  0.037519  0.003695   
5   0.000000  0.000000  0.000000  0.000000  0.000000  0.011169  0.023717   
6   0.000000  0.000000  0.000000  0.000000  0.082944  0.000000  0.077395   
7   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
8   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
9   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
10  0.000000  0.000000  0.000000  0.000000  0.072376  0.000000  0.000000   
11  0.000000  0.000000  0.000000  0.000000  0.017848  0.000000  0.080695   
12  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.019433   
13  0.081914

In [6]:
L = average_df.values

# Create an identity matrix of the same size
I = np.eye(L.shape[0])

# Calculate the inverse of (I - L)
I_minus_L_inv = np.linalg.inv(I - L)

# Calculate L * (I - L)^-1
result = np.dot(L, I_minus_L_inv)

# Convert the result back to a DataFrame to obtain the grey comprehensive influence matrix
df_T = pd.DataFrame(result)
grey_total_df = df_T
grey_total_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0.002017,0.037728,0.07984231,0.086107,0.099799,0.09455,0.052285,0.08419,0.113198,0.096375,0.006274,0.031892,0.004112,0.105351,0.030864,0.032698,0.055228,0.003948,0.000695
1,0.044789,0.001764,0.0275102,0.059393,0.103146,0.100696,0.109192,0.088465,0.121327,0.066894,0.091173,0.06443,0.019956,0.135191,0.081978,0.04407,0.07456,0.014872,0.000736
2,0.000929,5.1e-05,0.0002000973,0.002587,0.000822,0.001187,0.088524,0.07021,0.092709,0.000677,0.005582,0.080614,0.011251,0.07551,0.001609,0.061337,0.054279,9.1e-05,6e-05
3,0.002407,0.000125,0.0005190323,0.000723,0.007647,0.040315,0.011052,0.076129,0.087632,0.038781,0.003033,0.078102,0.029223,0.108964,0.002372,0.009121,0.040328,0.000523,9e-05
4,0.000593,0.000572,7.603389e-05,9.6e-05,0.008463,0.020734,0.030766,0.019372,0.020654,0.004589,0.002048,0.00871,0.000326,0.031024,0.085291,0.073013,0.092955,0.029146,0.006556
5,6e-05,5.8e-05,7.751205e-06,1e-05,0.09042,0.002468,0.084345,0.03687,0.043013,0.083411,0.004462,0.066853,3.3e-05,0.051967,0.008424,0.088512,0.08852,0.009113,0.000668
6,6e-06,6e-06,8.077824e-07,1e-06,0.00104,0.000164,0.007015,0.088512,0.088311,2.1e-05,0.052741,0.085162,3e-06,0.107198,0.001762,0.033369,0.062723,9.9e-05,7e-05
7,8e-06,7e-06,9.848082e-07,1e-06,4.2e-05,5.4e-05,9.7e-05,3.4e-05,7.9e-05,1e-05,1.1e-05,4.1e-05,4e-06,0.082999,0.000538,0.002345,0.08429,9.3e-05,8.5e-05
8,9e-06,8e-06,1.098561e-06,1e-06,0.000565,0.000968,0.00055,0.000183,0.000949,8.8e-05,3.5e-05,0.00067,5e-06,0.084549,0.011771,0.003428,0.09072,0.000121,9.5e-05
9,5.2e-05,5e-05,6.682343e-06,8e-06,0.073042,0.001567,0.004478,0.031962,0.011927,0.000345,0.000273,0.080833,2.9e-05,0.087159,0.00684,0.026806,0.107646,0.002221,0.000576



Using the grey comprehensive influence matrix, calculate the values of each indicator.

In [7]:
df = pd.DataFrame(grey_total_df)

# Calculate the comprehensive influence degree of each column (f)
f = df.sum(axis=1)

# Calculate the degree of influence on each row (m)
m = df.sum(axis=0)

columns_set = ['I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11', 'I12', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7']

# Create the result DataFrame
result_matrix = pd.DataFrame({
    'Factors': columns_set,
    'Comprehensive Influence Degree D': f,
    'Degree of Being Influenced C': m,
    'Centrality D+C': f + m,
    'Causality D-C': f - m
})
result_matrix


Unnamed: 0,Factors,Comprehensive Influence Degree D,Degree of Being Influenced C,Centrality D+C,Causality D-C
0,I1,1.017155,0.223893,1.241048,0.793262
1,I2,1.250142,0.132014,1.382156,1.118127
2,I3,0.548229,0.137551,0.68578,0.410678
3,I4,0.537085,0.1884,0.725486,0.348685
4,I5,0.434985,0.709874,1.144859,-0.274889
5,I6,0.659217,0.550923,1.21014,0.108294
6,I7,0.528204,0.7109,1.239104,-0.182696
7,I8,0.170739,0.779915,0.950654,-0.609177
8,I9,0.194716,0.963201,1.157916,-0.768485
9,I10,0.435821,0.429513,0.865334,0.006307


Using the grey comprehensive influence matrix, calculate the overall influence matrix.

In [8]:
# Calculate the overall influence matrix H
df_H = df_T + I
df_H.round(3)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,1.002,0.038,0.08,0.086,0.1,0.095,0.052,0.084,0.113,0.096,0.006,0.032,0.004,0.105,0.031,0.033,0.055,0.004,0.001
1,0.045,1.002,0.028,0.059,0.103,0.101,0.109,0.088,0.121,0.067,0.091,0.064,0.02,0.135,0.082,0.044,0.075,0.015,0.001
2,0.001,0.0,1.0,0.003,0.001,0.001,0.089,0.07,0.093,0.001,0.006,0.081,0.011,0.076,0.002,0.061,0.054,0.0,0.0
3,0.002,0.0,0.001,1.001,0.008,0.04,0.011,0.076,0.088,0.039,0.003,0.078,0.029,0.109,0.002,0.009,0.04,0.001,0.0
4,0.001,0.001,0.0,0.0,1.008,0.021,0.031,0.019,0.021,0.005,0.002,0.009,0.0,0.031,0.085,0.073,0.093,0.029,0.007
5,0.0,0.0,0.0,0.0,0.09,1.002,0.084,0.037,0.043,0.083,0.004,0.067,0.0,0.052,0.008,0.089,0.089,0.009,0.001
6,0.0,0.0,0.0,0.0,0.001,0.0,1.007,0.089,0.088,0.0,0.053,0.085,0.0,0.107,0.002,0.033,0.063,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.083,0.001,0.002,0.084,0.0,0.0
8,0.0,0.0,0.0,0.0,0.001,0.001,0.001,0.0,1.001,0.0,0.0,0.001,0.0,0.085,0.012,0.003,0.091,0.0,0.0
9,0.0,0.0,0.0,0.0,0.073,0.002,0.004,0.032,0.012,1.0,0.0,0.081,0.0,0.087,0.007,0.027,0.108,0.002,0.001


Using the grey comprehensive influence matrix (where each value represents a degree of greyness, but the algorithm does not consider generating greyscale images), calculate the threshold based on the OTSU algorithm.
Using the overall influence matrix and the threshold, obtain the reachable matrix.

In [9]:
df_H.shape

(19, 19)

In [10]:
import cv2
import numpy as np

# Convert to 8-bit grayscale image
image = (df_H.values * 255).astype(np.uint8)

# OTSU algorithm
def otsu_threshold(image):
    # Calculate histogram
    hist, bins = np.histogram(image.flatten(), bins=64, range=[0, 256])
    
    # Calculate cumulative sum of probabilities, foreground probability (w0_t)
    w0_t = np.cumsum(hist / (df_H.shape[0]*df_H.shape[1]))
    
    # Calculate cumulative sum of probabilities, background probability (w1_t)
    w1_t = -w0_t + 1
    
    miu_0 = w0_t.mean()
    miu_1 = w1_t.mean()

    # Cumulative variance
    threshold = w0_t * w1_t * (miu_0 - miu_1) ** 2

    return threshold.max()

# Calculate optimal threshold
threshold = otsu_threshold(image)
print(threshold)

# Binarization
binary_image = (image > threshold).astype(np.uint8)

# Convert the binarized image back to DataFrame
binary_df = pd.DataFrame(binary_image)

# Use the rename function to replace column names and index
df_renamed = binary_df.rename(columns=dict(zip(df.columns, columns_set)), index=dict(zip(df.index, columns_set)))
df_renamed


0.18617831498495047


Unnamed: 0,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,D1,D2,D3,D4,D5,D6,D7
I1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
I2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
I3,0,0,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,0,0
I4,0,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0
I5,0,0,0,0,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1
I6,0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0
I7,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,1,0,0
I8,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
I9,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,1,0,0
I10,0,0,0,0,1,0,1,1,1,1,0,1,0,1,1,1,1,0,0
