In [1]:
import networkx as nx
import random
from collections import Counter, defaultdict

def slpa(G, max_iter=30, threshold=0.1):
    # Initialize memory of each node to its own label
    memories = {node: [node] for node in G.nodes()}

    # SLPA iterations
    for _ in range(max_iter):
        for node in G.nodes():
            if G.neighbors(node):
                # Listener node randomly picks a label from its neighbors (speakers)
                speaker = random.choice(list(G.neighbors(node)))
                chosen_label = random.choice(memories[speaker])

                # Listener updates its memory
                memories[node].append(chosen_label)

    # Post-processing to form communities
    communities = defaultdict(list)
    for node, memory in memories.items():
        # Count the frequency of each label in the memory
        label_count = Counter(memory)
        # Filter labels based on a threshold
        labels = [label for label, count in label_count.items() if count / max_iter >= threshold]
        for label in labels:
            communities[label].append(node)

    # Filter out communities with fewer than 3 nodes and remove duplicates
    unique_communities = set()
    for community in communities.values():
        if len(community) >= 3:
            unique_communities.add(tuple(sorted(community)))

    return list(unique_communities)

# Load your graph
G = nx.read_edgelist('Filtered_Edge_Pairs.txt', nodetype=int)

# Apply SLPA
communities = slpa(G)

for idx, community in enumerate(communities):
    print(f"Community {idx + 1}: {community}")

# Write the communities to a file in the specified format
with open('slpa_communities.txt', 'w') as file:
    for community in communities:
        file.write('\t'.join(map(str, community)) + '\n')


Community 1: (2972840, 2972884, 2972886, 2972925, 2972930)
Community 2: (1868775, 1868779, 1868787, 3112717, 3112731, 3381966)
Community 3: (2972944, 2972948, 2972951, 2972965, 2973084, 2973094, 2973098, 2973099, 2973101, 2973103, 2973110, 2973113, 2973114, 2973121)
Community 4: (3885615, 3885616, 3885619, 3885622, 3885627, 3885630, 3885638, 3885641, 3885656, 3885667, 3885668)
Community 5: (914598, 1799493, 1799497, 1799505, 1799562)
Community 6: (2275182, 2562699, 2562700, 2562704, 2562707, 2562743, 2562748, 2562751, 2562754, 2562766, 2562782)
Community 7: (2635308, 2635393, 2635397)
Community 8: (3593274, 3593275, 3593285)
Community 9: (2218632, 2312460, 2312463, 2312497, 2635292, 2635295, 2635303, 2635304, 2635383, 2635386, 2635388, 2635390, 2635391, 2635399, 2635400, 2635413)
Community 10: (3112716, 3112717, 3112745, 3381966)
Community 11: (2562719, 2562740, 2562743, 2562754, 2562763, 2562764, 2562782)
Community 12: (1334183, 1705900, 2054331, 2054338, 2054343, 2054344, 3339421, 33

In [2]:
len(communities)

260

In [3]:
max_community_size = max(len(community) for community in communities)
print(f"The largest community has {max_community_size} nodes.")

The largest community has 52 nodes.
