### 肯德尔相关系数（与SIR模型对比）

In [None]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [None]:
import pandas as pd

# 读取CSV文件
df = pd.read_csv('custom_sorted.csv')

# 根据'Centrality'列的值降序排序
df_sorted = df.sort_values(by='Centrality', ascending=False)

# 获取前十个节点
top_20_nodes = df_sorted.head(200)

# 打印前十个节点
print("Top 20 nodes by centrality:")
print(top_20_nodes[['Node', 'Centrality']])

# 提取节点ID作为初始感染节点
initial_infected_nodes = top_20_nodes['Node'].tolist()

# 打印初始感染节点
print("Initial infected nodes:")
print(initial_infected_nodes)

In [None]:
# SIR模型参数
beta = 0.05  # 感染率
gamma = 0.01  # 恢复率
iterations = 100  # 模拟次数


In [None]:
import random

# 初始化感染状态：S-易感，I-感染，R-移除
def run_SIR(G, beta, gamma, initial_infected, iterations):
    # 初始状态：所有节点都易感，只有初始感染节点被感染
    final_infection_scale = {node: 0 for node in G.nodes()}  # 记录每个节点的感染规模

    for _ in range(iterations):
        status = {node: 'S' for node in G.nodes()}  # 每次模拟初始化
        for node in initial_infected:import random

# 初始化感染状态：S-易感，I-感染，R-移除
def run_SIR(G, beta, gamma, initial_infected, iterations):
    # 初始状态：所有节点都易感，只有初始感染节点被感染
    final_infection_scale = {node: 0 for node in G.nodes()}  # 记录每个节点的感染规模

    for _ in range(iterations):
        status = {node: 'S' for node in G.nodes()}  # 每次模拟初始化
        for node in initial_infected:
            status[node] = 'I'

        infected_counts = {node: 0 for node in G.nodes()}

        while any(state == 'I' for state in status.values()):
            new_infected = []
            for node in G.nodes():
                if status[node] == 'I':  # 节点处于感染状态
                    neighbors = list(G.neighbors(node))  # 获取所有邻居节点
                    for neighbor in neighbors:
                        if status[neighbor] == 'S':  # 邻居是易感的
                            # 获取当前边的权重
                            edge_weight = G[node][neighbor].get('weight', 1)  # 默认权重为1
                            if random.random() < beta * edge_weight:  # 使用权重调整感染概率
                                new_infected.append(neighbor)
                    if random.random() < gamma:  # 按照恢复率恢复
                        status[node] = 'R'

            # 更新新的感染者
            for node in new_infected:
                status[node] = 'I'

            # 统计感染规模
            for node in G.nodes():
                if status[node] == 'I' or status[node] == 'R':
                    infected_counts[node] += 1

        # 更新每次模拟的感染规模
        for node in infected_counts:
            final_infection_scale[node] += infected_counts[node]

    # 计算平均感染规模
    for node in final_infection_scale:
        final_infection_scale[node] /= iterations

    return final_infection_scale


In [None]:
# 运行 SIR 模型
infection_result = run_SIR(G, beta, gamma, initial_infected_nodes, iterations)

In [None]:
infection_result

In [None]:
# 将结果保存为 CSV 文件
output_df = pd.DataFrame(list(infection_result.items()), columns=['Node', 'AverageInfectionScale'])

# 对结果进行降序排序
sorted_output_df = output_df.sort_values(by='AverageInfectionScale', ascending=False)

# 将排序后的结果保存为 CSV 文件
sorted_output_df.to_csv('sorted_infection_results200.csv', index=False)

print("感染结果已保存到 'sorted_infection_results1.csv'.")

In [None]:



# 给感染规模文件添加排名
# 读取感染规模数据
infection_df = pd.read_csv('sorted_infection_results200.csv')

# 为相同的 AverageInfectionScale 值分配相同的排名
infection_df['Rank'] = infection_df['AverageInfectionScale'].rank(method='min', ascending=False).astype(int)

# 保存排名结果
infection_df.to_csv('sorted_infection_results_with_ranks200.csv', index=False)

print("感染规模数据文件已添加排名并保存。")


In [None]:
import pandas as pd

# 读取不同中心性指标的文件，并按照 Centrality 列排序
files = {
    'Degree': 'degree_sorted.csv',
    'Closeness': 'closeness_sorted.csv',
    'Betweenness': 'betweenness_sorted.csv',
    'Eigenvector': 'eigenvector_sorted.csv',
    'Custom': 'custom_sorted1.csv',
    'Pagerank':'pagerank_sorted1.csv'
}

# 按照各个中心性文件中的 Centrality 列进行排序，并添加排名列
for centrality_name, file_name in files.items():
    # 读取中心性文件
    centrality_df = pd.read_csv(file_name)
    
    # 按照 Centrality 列进行降序排序
    sorted_by_centrality = centrality_df.sort_values(by='Centrality', ascending=False)
    
    # 增加 Rank 列，排名从 1 开始
    sorted_by_centrality['Rank'] = sorted_by_centrality['Centrality'].rank(ascending=False, method='min').astype(int)
    
    # 保存排序后的结果
    sorted_by_centrality.to_csv(f'{centrality_name.lower()}_with_ranks1.csv', index=False)


##### beta = 0.05

In [None]:
import pandas as pd
from scipy.stats import kendalltau

# 中心性文件字典
centrality_dict = {
    'Degree': 'degree_with_ranks.csv',
    'Closeness': 'closeness_with_ranks.csv',
    'Betweenness': 'betweenness_with_ranks.csv',
    'Eigenvector': 'eigenvector_with_ranks.csv',
    'Custom': 'custom_with_ranks1.csv',
    'Pagerank':'pagerank_with_ranks1.csv'

}

# 读取 SIR 模型的感染规模结果（带有排名）
infection_results = pd.read_csv('sorted_infection_results_with_ranks200.csv')

# 计算 Kendall 的 Tau 相关系数
results = {}

# 遍历所有中心性文件
for method, filename in centrality_dict.items():
    # 读取中心性数据（带有排名）
    centrality_data = pd.read_csv(filename)
    
    # 合并中心性数据与感染规模数据
    merged_df = pd.merge(centrality_data[['Node', 'Rank']], infection_results[['Node', 'Rank']], on='Node', suffixes=('_Centrality', '_Infection'))
    
    # 计算 Kendall 的 Tau 相关系数
    tau, p_value = kendalltau(merged_df['Rank_Centrality'], merged_df['Rank_Infection'])
    results[method] = tau

# 保存 Kendall 的 Tau 相关系数结果
results_df = pd.DataFrame(list(results.items()), columns=['CentralityMethod', 'KendallTau'])
#results_df.to_csv('kendall_tau_results.csv', index=False)
results_df



In [None]:
def run_SIR(G, beta, gamma, initial_infected, iterations):
    final_infection_scale = {node: 0 for node in G.nodes()}
    for _ in range(iterations):
        status = {node: 'S' for node in G.nodes()}
        for node in initial_infected:
            status[node] = 'I'

        infected_counts = {node: 0 for node in G.nodes()}
        while any(state == 'I' for state in status.values()):
            new_infected = []
            for node in G.nodes():
                if status[node] == 'I':
                    neighbors = list(G.neighbors(node))
                    for neighbor in neighbors:
                        if status[neighbor] == 'S':
                            edge_weight = G[node][neighbor].get('weight', 1)
                            if random.random() < beta * edge_weight:
                                new_infected.append(neighbor)
                    if random.random() < gamma:
                        status[node] = 'R'
            for node in new_infected:
                status[node] = 'I'
            for node in G.nodes():
                if status[node] == 'I' or status[node] == 'R':
                    infected_counts[node] += 1

        for node in infected_counts:
            final_infection_scale[node] += infected_counts[node]

    for node in final_infection_scale:
        final_infection_scale[node] /= iterations

    return final_infection_scale

In [None]:
# 设定参数
beta_values = np.linspace(0, 0.2, 5)  # 从0到0.2的β值
gamma = 0.01  # 假设的恢复率

iterations = 100  # 模拟次数

In [None]:
# 存储结果并保存为 CSV 文件
for beta in beta_values:
    infection_result = run_SIR(G, beta, gamma, initial_infected_nodes, iterations)
    output_df = pd.DataFrame(list(infection_result.items()), columns=['Node', 'AverageInfectionScale'])
    sorted_output_df = output_df.sort_values(by='AverageInfectionScale', ascending=False)
    sorted_output_df.to_csv(f'sorted_infection_results_beta_{beta:.2f}1.csv', index=False)

print("感染结果已保存为不同 β 值的 CSV 文件。")

In [None]:
import pandas as pd

# β 值列表
beta_values = np.linspace(0, 0.2, 5)  # 从0到0.2的β值

# 为每个 β 值的感染规模数据添加排名
for beta in beta_values:
    # 读取感染规模数据
    input_file = f'sorted_infection_results_beta_{beta:.2f}1.csv'
    output_file = f'sorted_infection_results_with_ranks_beta_{beta:.2f}1.csv'
    
    infection_df = pd.read_csv(input_file)
    
    # 为相同的 AverageInfectionScale 值分配相同的排名
    infection_df['Rank'] = infection_df['AverageInfectionScale'].rank(method='min', ascending=False).astype(int)
    
    # 保存排名结果
    infection_df.to_csv(output_file, index=False)
    
    print(f"感染规模数据文件（β={beta:.2f}）已添加排名并保存为 '{output_file}'。")


In [None]:
import pandas as pd
from scipy.stats import kendalltau

# β 值列表和相应的文件名
beta_values = [0.00, 0.05, 0.10, 0.15, 0.20]
infection_files = [f'sorted_infection_results_with_ranks_beta_{beta:.2f}1.csv' for beta in beta_values]

# 中心性文件字典
centrality_dict = {
    'DC': 'degree_with_ranks.csv',
    'CC': 'closeness_with_ranks.csv',
    'BC': 'betweenness_with_ranks.csv',
    'EC': 'eigenvector_with_ranks.csv',
    'CT-PR': 'custom_with_ranks1.csv'
}

# 存储 Kendall 的 Tau 结果
kendall_tau_results = {}

for beta, file_name in zip(beta_values, infection_files):
    # 读取 SIR 模型的感染规模结果（带有排名）
    infection_results = pd.read_csv(file_name)
    
    # 计算 Kendall 的 Tau 相关系数
    beta_results = {}
    
    for method, centrality_file in centrality_dict.items():
        # 读取中心性数据（带有排名）
        centrality_data = pd.read_csv(centrality_file)
        
        # 合并中心性数据与感染规模数据
        merged_df = pd.merge(centrality_data[['Node', 'Rank']], infection_results[['Node', 'Rank']], on='Node', suffixes=('_Centrality', '_Infection'))
        
        # 计算 Kendall 的 Tau 相关系数
        tau, p_value = kendalltau(merged_df['Rank_Centrality'], merged_df['Rank_Infection'])
        beta_results[method] = tau
    
    # 存储结果
    kendall_tau_results[beta] = beta_results

# 将结果保存为 CSV 文件
results_df = pd.DataFrame(kendall_tau_results).T
results_df.index.name = 'Beta'
results_df.to_csv('kendall_tau_results_by_beta.csv')

print("Kendall 的 Tau 相关系数结果已保存到 'kendall_tau_results_by_beta.csv'。")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# 读取 Kendall 的 Tau 结果
results_df = pd.read_csv('kendall_tau_results_by_beta111.csv', index_col='Beta')

# 绘制折线图
plt.figure(figsize=(12, 8))

for method in results_df.columns:
    plt.plot(results_df.index, results_df[method], marker='o', label=method)

plt.xlabel('Beta Value')
plt.ylabel('Kendall\'s Tau')
plt.title('Kendall\'s Tau for Different Beta Values')
plt.legend(title='Centrality Method')
plt.grid(True)
plt.xticks(ticks=results_df.index)
plt.tight_layout()
plt.savefig('kendall_tau_results_line_plot1.png')
plt.close()

print("Kendall 的 Tau 相关系数折线图已保存为 'kendall_tau_results_line_plot1.png'。")
