In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx


In [49]:
graph = nx.read_gexf('../graph/iou_graph.gexf')

In [43]:
graph_pruned = graph.copy()

# remove edges with weights less than 0.1
for u, v, d in graph.edges(data=True):
    if d['weight'] < 0.08:
        graph_pruned.remove_edge(u, v)

In [47]:
communities = nx.community.girvan_newman(graph_pruned)

In [48]:
for i in range(2):
    c = next(communities)
    print(f'Community {i}: {c}')
    print(f'Community {i} size: {len(c)}')
    print(f'Community {i} sizes: {[len(x) for x in c]}')
    print()


Community 0: ({'572721', '31156', '4145', '608398', '40121', '571532', '37541', '51137', '604347', '677255', '637684', '37016', '515848', '667504', '516664', '81415', '631569', '580853', '640002', '568460', '176874', '519683', '689910', '666523', '582485', '569699', '509522', '545965', '16074', '17990', '634887', '669726', '632337', '145142', '687471', '501871', '534185', '170871', '679247', '244446', '580719', '162594', '212235', '527615', '211169', '20373', '333436', '501362', '548930', '586220', '651722', '563419', '563659', '316715', '674205', '518923', '506292', '634649', '655214', '212153', '3481', '73473', '688651', '548147', '515528', '541767', '686483', '684221', '501145', '106197', '36397', '219782', '190825', '501428', '204531', '586524', '680518', '531256', '679326', '543372', '635965', '2183', '71295', '513236', '574550', '685281', '178915', '667310', '501396', '181296', '507451', '629842', '186971', '185189', '502216', '691646', '207112', '679401', '578245', '667720', '81

In [25]:
weights = nx.get_edge_attributes(graph, 'weight').values()
inverse_weights = [1 / weight for weight in weights]
nx.set_edge_attributes(graph, dict(zip(graph.edges, inverse_weights)), 'distance')

In [26]:
def most_valuable_edge(G):
    """Returns the edge with the highest betweenness centrality
    in the graph `G`.

    """
    betweenness = nx.edge_betweenness_centrality(G, weight='distance')
    return max(betweenness, key=betweenness.get)

communities = nx.community.girvan_newman(graph, most_valuable_edge=most_valuable_edge)

In [27]:
for i in range(2):
    c = next(communities)
    print(f'Community {i}: {c}')
    print(f'Community {i} size: {len(c)}')
    print(f'Community {i} sizes: {[len(x) for x in c]}')
    print()


KeyboardInterrupt: 

In [28]:
communities = nx.community.greedy_modularity_communities(graph, weight='weight')

for c in communities:
    print(c)
    print(len(c))
    print()

frozenset({'572721', '521197', '175906', '40121', '31156', '608398', '507039', '632891', '37541', '565989', '604347', '539719', '81415', '51137', '37016', '637684', '655364', '515848', '580853', '558008', '579244', '655112', '541477', '78369', '515725', '630917', '519683', '98094', '690190', '163831', '669110', '17990', '569699', '676972', '545965', '669726', '499499', '145142', '50231', '687471', '501871', '534185', '608649', '638455', '579736', '686291', '244446', '580719', '557165', '74606', '211169', '652165', '190900', '553628', '603718', '527615', '19663', '552049', '501362', '586220', '185213', '651722', '557506', '563419', '563659', '676476', '562608', '518923', '41370', '506292', '634649', '584878', '60845', '212153', '3481', '73473', '688651', '499576', '682456', '248535', '510266', '31476', '691693', '84316', '244729', '135214', '548147', '515528', '686483', '120500', '541767', '501145', '36397', '501428', '204531', '586524', '575281', '531256', '679326', '196693', '71295', 

In [45]:
communities = nx.community.asyn_lpa_communities(graph_pruned, weight='weight')

communities = list(communities)
print(len(communities))

for c in communities:
    print(c)
    print(len(c))
    print()

print(np.median([len(c) for c in communities]))

59
{'572721', '31156', '4145', '608398', '40121', '571532', '37541', '81415', '604347', '677255', '637684', '51137', '515848', '667504', '516664', '631569', '37016', '580853', '640002', '568460', '176874', '519683', '689910', '582485', '634887', '545965', '569699', '16074', '509522', '17990', '669726', '632337', '687471', '534185', '170871', '244446', '580719', '162594', '212235', '527615', '20373', '333436', '501362', '548930', '586220', '563419', '563659', '316715', '674205', '518923', '506292', '634649', '655214', '212153', '3481', '688651', '548147', '515528', '541767', '684221', '501145', '106197', '36397', '219782', '190825', '501428', '204531', '586524', '680518', '635965', '543372', '679326', '2183', '513236', '574550', '685281', '178915', '667310', '181296', '501396', '507451', '629842', '185189', '502216', '691646', '207112', '679401', '578245', '8199', '667720', '122374', '75691', '545815', '103293', '49497', '203598', '188652', '683458', '691053', '224830', '524535', '60120