In [1]:
import networkx as nx
from networkx.algorithms.community.quality import modularity
from collections import defaultdict

In [2]:
def read_edge_list(path):
    G = nx.Graph()
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            parts = line.split()
            if len(parts) < 2:
                continue
            # transfer into str
            u, v = str(parts[0]), str(parts[1])
            G.add_edge(u, v)
    return G

In [3]:
def read_communities(path):
    node2comm = {}
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            parts = line.split()
            if len(parts) < 2:
                continue
            node, comm = str(parts[0]), str(parts[1])
            node2comm[node] = comm
    return node2comm

In [4]:
def node2communities_list(node2comm):
    comm2nodes = defaultdict(list)
    for node, comm in node2comm.items():
        comm2nodes[comm].append(str(node))
    communities = [set(nodes) for nodes in comm2nodes.values() if len(nodes) > 0]
    return communities, comm2nodes

def compute_modularity(edge_path, comm_path):
    G = read_edge_list(edge_path)
    node2comm = read_communities(comm_path)
    communities, comm2nodes = node2communities_list(node2comm)

    # 社区文件里有但图里没有的节点 -> 加为孤立点
    missing_nodes = set(node2comm.keys()) - set(G.nodes())
    if missing_nodes:
        print(f"Warning: {len(missing_nodes)} nodes appear in community file but not in edge file. Added as isolated nodes.")
        G.add_nodes_from(missing_nodes)

    # 图里有但社区文件没标注的节点 -> 当作单节点社区
    unassigned = set(G.nodes()) - set(node2comm.keys())
    if unassigned:
        print(f"Warning: {len(unassigned)} graph nodes have no community assignment. They are treated as singleton communities.")
        for n in unassigned:
            communities.append({str(n)})

    Q = modularity(G, communities)
    print("\nNumber of nodes in graph:", G.number_of_nodes())
    print("Number of edges in graph:", G.number_of_edges())
    print("Number of communities (after processing):", len(communities))
    print("Modularity Q = {:.6f}".format(Q))
    
    return Q

In [8]:
if __name__ == '__main__':
    # edge_path = 'AskUbuntu/ubuntu_graph_filtered.txt'
    # comm_path = 'AskUbuntu/communities_ubuntu_louvain_filtered.txt'
    # edge_path = 'Twitter-19/twitter_covid19_graph_filtered.txt'
    # comm_path = 'Twitter-19/communities_covid19_louvain_filtered.txt'
    edge_path = 'YouTube/youtube_top5000_subgraph.txt'
    comm_path = 'YouTube/node2comm_youtube.txt'
    compute_modularity(edge_path, comm_path)


Number of nodes in graph: 34861
Number of edges in graph: 216626
Number of communities (after processing): 2222
Modularity Q = 0.133806

Report saved to: modularity_report_ubuntu.txt
