In [1]:
import pandas as pd
import networkx as nx

In [2]:
# Define the function to calculate assortativity coefficient
def calculate_assortativity(nodes_file, edges_file):
    # Load the nodes and edges data
    nodes_df = pd.read_csv(nodes_file)
    edges_df = pd.read_csv(edges_file)
    
    # Create the graph
    G = nx.Graph()
    
    # Add edges and nodes to the graph
    for _, row in edges_df.iterrows():
        G.add_edge(row['source'], row['target'], weight=row['weight'])
    
    for _, row in nodes_df.iterrows():
        G.nodes[row['id']]['label'] = row['label']
    
    # Calculate and return assortativity coefficient
    return nx.attribute_assortativity_coefficient(G, 'label')

In [3]:
# Calculate for each community
assortativity_c1 = calculate_assortativity('G:/共病/数据/community1_nodes_with_centrality.csv', 'G:/共病/数据/community1_edges.csv')
assortativity_c2 = calculate_assortativity('G:/共病/数据/community2_nodes_with_centrality.csv', 'G:/共病/数据/community2_edges.csv')
assortativity_c3 = calculate_assortativity('G:/共病/数据/community3_nodes_with_centrality.csv', 'G:/共病/数据/community3_edges.csv')

In [4]:
print("Assortativity Coefficient for Community 1:", assortativity_c1)
print("Assortativity Coefficient for Community 2:", assortativity_c2)
print("Assortativity Coefficient for Community 3:", assortativity_c3)

Assortativity Coefficient for Community 1: 0.04874856005864494
Assortativity Coefficient for Community 2: 0.037480940392184685
Assortativity Coefficient for Community 3: 0.04383031156486531


In [5]:
def calculate_homophily(nodes_file, edges_file):
    # Load the nodes and edges data
    nodes_df = pd.read_csv(nodes_file)
    edges_df = pd.read_csv(edges_file)
    
    # Create the graph
    G = nx.Graph()
    
    # Add edges and nodes to the graph
    for _, row in edges_df.iterrows():
        G.add_edge(row['source'], row['target'], weight=row['weight'])
    
    for _, row in nodes_df.iterrows():
        G.nodes[row['id']]['category'] = row['label']
    
    intra_category_edges = 0
    inter_category_edges = 0
    
    for u, v in G.edges():
        if G.nodes[u]['category'] == G.nodes[v]['category']:
            intra_category_edges += 1
        else:
            inter_category_edges += 1
    
    total_edges = intra_category_edges + inter_category_edges
    homophily_ratio = intra_category_edges / total_edges if total_edges > 0 else 0
    
    return homophily_ratio

In [6]:
# Calculate for each community
homophily_c1 = calculate_homophily('G:/共病/数据/community1_nodes_with_centrality.csv', 'G:/共病/数据/community1_edges.csv')
homophily_c2 = calculate_homophily('G:/共病/数据/community2_nodes_with_centrality.csv', 'G:/共病/数据/community2_edges.csv')
homophily_c3 = calculate_homophily('G:/共病/数据/community3_nodes_with_centrality.csv', 'G:/共病/数据/community3_edges.csv')

In [7]:
print("Homophily Ratio for Community 1:", homophily_c1)
print("Homophily Ratio for Community 2:", homophily_c2)
print("Homophily Ratio for Community 3:", homophily_c3)

Homophily Ratio for Community 1: 0.24643584521384929
Homophily Ratio for Community 2: 0.16346153846153846
Homophily Ratio for Community 3: 0.20618556701030927
