# Community Detection in a Facebook network

## Dataset

In [1]:
import networkx as nx

In [49]:
edges_filename = 'example.txt'

In [50]:
# Load the Facebook dataset
graph = nx.read_edgelist(f'data/{edges_filename}')

In [51]:
# Explore the properties of the graph

In [52]:
# print number of nodes / edges
print('Number of nodes:', graph.number_of_nodes())
print('Number of edges:', graph.number_of_edges())

Number of nodes: 16
Number of edges: 28


## Algorithms implementation

In [53]:
# Implement the Louvain method for community detection.
'''
For each passage:
Step 1: Initialization: node = community
Step 2: Remove node vi from its community
Step 3: Insert vi
in a neighboring community that
maximizes ∆Q
Step 4: Repeat Step 1 until the partition does not evolve
Step 5: Transform the communities into (hyper)nodes and
Repeat Step 1
We do passages until convergence

while communities change:
    for each community i:
        for each community j that is a neighbor of i:
            dij = 2 * #links_between_communities
            di = #neighbors_of_i
            dj = #neighbors_of_j
        merge community i with the community j which gave the biggest modularity gain
'''
        
def louvain_community_detection(graph):
    """
    Louvain's algorithm to detect communities in a graph.
    :param graph: networkx graph
    :returns: dictionary with graph's nodes as keys and community's id as values
    """
    m = graph.number_of_nodes()
    prev_communities = {}
    communities = {i: i for i in graph.nodes} # keys: nodes' id, values: nodes' community id
    while list(prev_communities.values()) != list(communities.values()): # while communities are changing
        prev_communities = communities.copy()
        
        communities = {k: v for k, v in sorted(communities.items(), key=lambda item: int(item[1]))} # sort by node id (keys)
        for nodei in communities.keys():
            communityi = communities[nodei]
            di = len(list(graph.neighbors(nodei)))
            neighbor_communities = set(communities[neighbor] for neighbor in graph.neighbors(nodei))
            
            # compute modularity gain for each neighbor community of nodei
            modularity_gains = {} # keys: community' id, values: modularity gain
            for communityj in neighbor_communities:
                nodes_in_communityj = [node for node, community in communities.items() if community == communityj]
                dj = sum([len(list(graph.neighbors(node))) for node in nodes_in_communityj if node != nodei])
                # count number of neighbors of nodei that are in communityj
                links_ij = [1 if node in graph.neighbors(nodei) else 0 for node in nodes_in_communityj]
                dij = 2 * sum(links_ij)
                # compute modularity
                modularity_gains[communityj] = 1 / (2 * m) * (dij - (di * dj / m))
                
            # update node's community
            modularity_gains = {k: v for k, v in sorted(modularity_gains.items(), key=lambda item: item[1])} # sort by modularity gain (values)
            communities[nodei] = list(modularity_gains.keys())[-1]
        break
    return communities

In [54]:
# Implement the average clustering coefficient for a graph.
#C(v) = #connected pairs of v's neighbors / #pairs of v's neighbors

## Analysis

In [55]:
# Identify users’ communities in the Facebook network using Louvain
louvain_community_detection(graph)

{'0': '3',
 '1': '4',
 '2': '4',
 '3': '3',
 '4': '4',
 '5': '7',
 '6': '11',
 '7': '7',
 '8': '15',
 '9': '12',
 '10': '13',
 '11': '13',
 '12': '12',
 '13': '13',
 '14': '12',
 '15': '15'}

In [9]:
# Identify the top k users with the highest clustering coefficient in the graph.
# Experiment with different values of k and choose the most appropriate one.

In [10]:
# Evaluate different random walk strategies to spread a message across the network.
# The message should reach as many different communities as possible.

## Visualization

In [11]:
# Visualize the output of Louvain by coloring the nodes according to their assigned Communities.

In [12]:
# Visualize the output of applying Random Walk algorithm, by highlighting the sequence of nodes selected in a path.