In [60]:
import networkx as nx
from cdlib.algorithms import infomap

gexf_path = 'outputs/full_graph.gexf'
graph = nx.read_gexf(gexf_path)

print(f"{graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges")

20804 nodes, 99974 edges


In [61]:
communities = infomap(graph)
print(f"Found {len(communities.communities)} communities")

Found 9905 communities


In [62]:
components = nx.weakly_connected_components(graph)
largest_component_nodes = max(components, key=len)
lcc = graph.subgraph(largest_component_nodes).copy()
print(f"Largest component: {lcc.number_of_nodes()} nodes, {lcc.number_of_edges()} edges")

Largest component: 10894 nodes, 99923 edges


In [63]:
lcc_communities = infomap(lcc)
print(f"Found {len(lcc_communities.communities)} communities")
community_sizes = sorted([len(c) for c in lcc_communities.communities], reverse=True)
print("Top community sizes:", community_sizes[:10])

Found 29 communities
Top community sizes: [9843, 851, 124, 8, 8, 6, 6, 5, 4, 3]


In [65]:
node_to_comm = {}

node_to_comm = {}
for idx, community in enumerate(lcc_communities.communities):
    for node in community:
        node_to_comm[node] = idx

for node in lcc.nodes():
    lcc.nodes[node]['infomap_community'] = node_to_comm.get(node, -1)  # -1 for nodes not assigned

nx.write_gexf(lcc, "outputs/graph_with_infomap_communities.gexf")
print("Graph with community attributes saved as 'graph_with_infomap_communities.gexf'")


Graph with community attributes saved as 'graph_with_infomap_communities.gexf'
