In [None]:
import pandas as pd
import networkx as nx
import numpy as np
import community as community_louvain

# 读取数据
edges_df = pd.read_csv('statistics_edges.csv')

# 构建无向加权图
G = nx.Graph()
for idx, row in edges_df.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=row['Weight'])

# 使用 Louvain 算法进行社区检测
partition = community_louvain.best_partition(G)

# 计算每个社团的子图
def calculate_local_structural_entropy(subgraph):
    entropy_dict = {}
    
    for node in subgraph.nodes():
        neighbors = list(subgraph.neighbors(node))
        if len(neighbors) == 0:
            entropy_dict[node] = 0
            continue
        
        degrees = [subgraph.degree(neighbor) for neighbor in neighbors]
        degree_counts = np.bincount(degrees)
        degree_prob = degree_counts / np.sum(degree_counts)
        
        # 防止对数计算中的零值
        entropy = -np.sum(degree_prob * np.log(degree_prob + np.finfo(float).eps))
        entropy_dict[node] = entropy
    
    return entropy_dict

# 获取每个节点所属的社区
communities = set(partition.values())

# 提取每个社团的子图并计算局部结构熵
for community_id in communities:
    # 获取当前社团的节点
    nodes_in_community = [node for node, comm in partition.items() if comm == community_id]
    subgraph = G.subgraph(nodes_in_community)
    
    # 计算局部结构熵
    local_entropy = calculate_local_structural_entropy(subgraph)
    
    # 打印结果
    print(f"Community {community_id}:")
    for node, entropy in local_entropy.items():
        print(f"  Node {node}: Local Structural Entropy = {entropy:.4f}")

In [None]:
# 创建一个 DataFrame 来存储结果
results = []

# 提取每个社团的子图并计算局部结构熵
for community_id in communities:
    # 获取当前社团的节点
    nodes_in_community = [node for node, comm in partition.items() if comm == community_id]
    subgraph = G.subgraph(nodes_in_community)
    
    # 计算局部结构熵
    local_entropy = calculate_local_structural_entropy(subgraph)
    
    # 收集结果
    for node, entropy in local_entropy.items():
        results.append({'Node': node, 'Local Structural Entropy': entropy})

# 将结果保存为 CSV 文件
results_df = pd.DataFrame(results)
results_df.to_csv('local_structural_entropy1.csv', index=False)

print("Results have been saved to 'local_structural_entropy.csv'.")

In [None]:
# 构建社区图
community_graph = nx.Graph()

# 遍历原图中的边，根据社区进行合并
for u, v, data in G.edges(data=True):
    community_u = partition[u]
    community_v = partition[v]
    
    # 如果两个节点属于不同社区，则在社区图中添加边
    if community_u != community_v:
        if community_graph.has_edge(community_u, community_v):
            # 如果边已经存在，则累加权重
            community_graph[community_u][community_v]['weight'] += data['weight']
        else:
            # 否则添加新边
            community_graph.add_edge(community_u, community_v, weight=data['weight'])


In [None]:
# 将社区图的边数据保存为 CSV
community_edges = nx.to_pandas_edgelist(community_graph)
community_edges.to_csv('community_edges.csv', index=False)


In [None]:
# 计算度中心性
degree_centrality = nx.degree_centrality(community_graph)

# 计算接近中心性
closeness_centrality = nx.closeness_centrality(community_graph)

# 计算介数中心性
#betweenness_centrality = nx.betweenness_centrality(community_graph)

# 计算特征向量中心性
eigenvector_centrality = nx.eigenvector_centrality(community_graph)

# 计算负载中心性
#load_centrality = nx.load_centrality(community_graph)

# 计算信息中心性
information_centrality = nx.information_centrality(community_graph)


In [None]:
import pandas as pd

# 创建 DataFrame 来存储各类中心性指标
centralities_df = pd.DataFrame({
    'Degree Centrality': degree_centrality,
    'Closeness Centrality': closeness_centrality,
    #'Betweenness Centrality': betweenness_centrality,
    'Eigenvector Centrality': eigenvector_centrality,
    #'Load Centrality': load_centrality,
    'Information Centrality': information_centrality
})


In [None]:
# 数据标准化 (0, 1) 范围
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_matrix = scaler.fit_transform(centralities_df)
normalized_df = pd.DataFrame(normalized_matrix, columns=centralities_df.columns)

In [None]:
import numpy as np

# 计算每个指标的熵
def calculate_entropy(normalized_matrix):
    epsilon = 1e-10  # 防止log(0)
    P = normalized_matrix / normalized_matrix.sum(axis=0)  # 归一化
    log_P = np.log(P + epsilon)
    entropy = -np.sum(P * log_P, axis=0) / np.log(len(P))
    return entropy

# 计算熵权
entropy = calculate_entropy(normalized_matrix)
weights = (1 - entropy) / np.sum(1 - entropy)  # 根据熵值计算权重


In [None]:
# 正理想解和负理想解
positive_ideal_solution = normalized_matrix.max(axis=0)
negative_ideal_solution = normalized_matrix.min(axis=0)
# 计算到正理想解和负理想解的距离
positive_distance = np.sqrt(np.sum((normalized_matrix - positive_ideal_solution) ** 2 * weights, axis=1))
negative_distance = np.sqrt(np.sum((normalized_matrix - negative_ideal_solution) ** 2 * weights, axis=1))
# 计算综合评价分数
topsis_score = negative_distance / (positive_distance + negative_distance)

# 将结果添加到 DataFrame
centralities_df['TOPSIS Score'] = topsis_score
 保存为 CSV 文件
centralities_df.to_csv('community_centralities_with_topsis.csv', index=True)


In [None]:
# 从 partition 获取所有唯一的社区编号
partition_communities = set(partition.values())
print("Partition 中的社区编号:", partition_communities)
# 获取 centralities_df 中的社区编号
centralities_communities = set(centralities_df.index)
print("centralities_df 中的社区编号:", centralities_communities)
# 找出 partition 中有但 centralities_df 中没有的社区编号
missing_in_centralities = partition_communities - centralities_communities
print("在 partition 中但缺失于 centralities_df 的社区编号:", missing_in_centralities)

# 找出 centralities_df 中有但 partition 中没有的社区编号
extra_in_centralities = centralities_communities - partition_communities
print("在 centralities_df 中但不存在于 partition 的社区编号:", extra_in_centralities)
#查看缺失社区中的节点
missing_communities =  {24} # 使用你检测到的缺失社区编号
missing_nodes = [node for node, community in partition.items() if community in missing_communities]
print("缺失社区中的节点:", missing_nodes)

# 检查这些节点的连通性和边
for node in missing_nodes:
    print(f"节点 {node} 的邻居:", list(G.neighbors(node)))
# 为缺失社区分配默认值 (如 0)
for missing_community in missing_communities:
    community_topsis_scores[missing_community] = 0  # 或 np.nan

In [None]:
# 为这些缺失社区创建一个 DataFrame，默认 TOPSIS 得分为 0（或 np.nan）
missing_communities_df = pd.DataFrame({
    'Community': list(missing_communities),
    'TOPSIS Score': 0  # 或 np.nan
})

# 如果 centralities_df 没有社区编号列，确保它的索引是社区编号
centralities_df = centralities_df.reset_index().rename(columns={'index': 'Community'})

# 合并缺失社区到 centralities_df 中
centralities_df = pd.concat([centralities_df, missing_communities_df], ignore_index=True)

# 确保 Community 列是索引
centralities_df.set_index('Community', inplace=True)

In [None]:
# 从 partition 获取每个节点所属的社区编号
node_community_mapping = partition  # 这是一个 {node_id: community_id} 的字典

# 获取社区的 TOPSIS 得分
community_topsis_scores = centralities_df['TOPSIS Score'].to_dict()  # {community_id: topsis_score}

# 为每个原始节点分配其所属社区的 TOPSIS 得分
node_topsis_scores = {node: community_topsis_scores[community] for node, community in node_community_mapping.items()}

# 将结果转化为 DataFrame
node_topsis_df = pd.DataFrame(list(node_topsis_scores.items()), columns=['Node', 'TOPSIS Score'])

# 保存结果为 CSV
node_topsis_df.to_csv('node_topsis_scores1.csv', index=False)

In [None]:
import pandas as pd

# 读取 CSV 文件
topsis_df = pd.read_csv('node_topsis_scores1.csv')
entropy_df = pd.read_csv('local_structural_entropy1.csv')

# 打印前几行数据以检查
print("TOPSIS Data:")
print(topsis_df.head())
print("\nLocal Structural Entropy Data:")
print(entropy_df.head())

# 合并两个 DataFrame，根据 'Node' 列
merged_df = pd.merge(topsis_df, entropy_df, on='Node')

# 打印合并后的数据以检查
print("\nMerged Data:")
print(merged_df.head())

# 保存合并后的结果到新的 CSV 文件
merged_df.to_csv('s_node_data1.csv', index=False)

print("Merged data has been saved to 'merged_node_data.csv'.")


In [None]:
import pandas as pd
import numpy as np

# 读取 CSV 文件
topsis_df = pd.read_csv('node_topsis_scores1.csv')
entropy_df = pd.read_csv('local_structural_entropy1.csv')

# 合并两个 DataFrame，根据 'Node' 列
merged_df = pd.merge(topsis_df, entropy_df, on='Node')


# 计算 e^TOPSIS Score * Local Structural Entropy
merged_df['Final_Score'] = (merged_df['TOPSIS Score'] + 1)* merged_df['Local Structural Entropy']

# 按 Final_Score 降序排序
sorted_df = merged_df.sort_values(by='Final_Score', ascending=False)

# 打印排序后的数据以检查
print("Sorted Data:")
print(sorted_df.head())

# 保存排序后的结果到新的 CSV 文件
sorted_df.to_csv('s_final_scores1.csv', index=False)

print("Sorted final scores have been saved to 'sorted_final_scores.csv'.")

In [None]:
# 将 Final_Score 作为初始的 PageRank 权重
initial_pagerank = merged_df.set_index('Node')['Final_Score'].to_dict()

# 计算 PageRank
pagerank_scores = nx.pagerank(G, personalization=initial_pagerank)


# 将 PageRank 结果保存到 DataFrame
pagerank_df = pd.DataFrame(list(pagerank_scores.items()), columns=['Node', 'PageRank'])

# 打印 PageRank 结果以检查
print("PageRank Results:")
print(pagerank_df.head())

# 保存 PageRank 结果到新的 CSV 文件
pagerank_df.to_csv('pagerank_results1.csv', index=False)

print("PageRank results have been saved to 'pagerank_results.csv'.")

In [None]:
import pandas as pd
from scipy.stats import kendalltau
import networkx as nx

# 计算各种中心性指标
degree_centrality = nx.degree_centrality(G)
closeness_centrality = nx.closeness_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G)

# 自定义方法得分 (例如用 PageRank 计算后的分数)
custom_scores_df = pd.read_csv('pagerank_results.csv')
custom_centrality = custom_scores_df.set_index('Node')['PageRank'].to_dict()

# 将所有中心性结果存储在一个字典中
centrality_dict = {
    'Degree': degree_centrality,
    'Closeness': closeness_centrality,
    'Betweenness': betweenness_centrality,
    'Eigenvector': eigenvector_centrality,
    'Custom': custom_centrality
}




In [None]:

# 将结果排序并存储到 CSV 文件
for method, centrality in centrality_dict.items():
    # 创建 DataFrame 并按中心性值排序
    df = pd.DataFrame(list(centrality.items()), columns=['Node', 'Centrality'])
    df = df.sort_values(by='Centrality', ascending=False).reset_index(drop=True)
    
    # 保存为 CSV 文件
    filename = f'{method.lower()}_sorted1.csv'
    df.to_csv(filename, index=False)
    print(f'Saved {method} centrality results to {filename}')
