In [1]:
import os
from tqdm import tqdm
import random
import numpy as np
import netwulf as nw
import networkx as nx
import matplotlib.pyplot as plt
import networkx.algorithms.community as nx_comm
# if os.getcwd()[-1] in '0123456798':
#     path_parent = os.path.dirname(os.getcwd())
#     os.chdir(path_parent)

In [2]:
stylized_network, config, G = nw.load("Graph.json")

In [3]:
partition = nx_comm.louvain_communities(G)

In [4]:
sizes = []
for i, c in enumerate(partition):
    # print(f'Community {i} has size: {len(c)}')
    sizes.append(len(c))
print(f'The average community size is {round(np.mean(sizes),2)}, with a standard deviation of {round(np.std(sizes),2)}. The largest is of size {np.max(sizes)} and the smallest {np.min(sizes)}')

The average community size is 50.15, with a standard deviation of 197.19. The largest is of size 3150 and the smallest 1


In [11]:
colours_avail = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
sizes = np.array(sizes)
indices = np.argsort(sizes)[-len(colours_avail):]

for i, c in enumerate(partition):
    if i in indices:
        col = colours_avail.pop()
    else:
        col = 'w'
    for node in c:
        G.node[node]['color'] = col

In [12]:
network, config = nw.visualize(G)
fig, ax = nw.draw_netwulf(network)
plt.savefig('Graph_coloured')

(45188, 45188)

In [5]:
N = G.number_of_edges()*2
number_of_iterations = 1000
coms = [list(part) for part in partition]

def modularity(G, communities):
    m = G.number_of_edges()
    q = 0.0
    for c in communities:
        lc = G.subgraph(c).number_of_edges()
        kc = sum(G.degree(n) for n in c)
        q += lc/m - (kc/(2*m))**2
    return q


def double_edge_swap_algo(G_original, N):
    G = G_original.copy()
    for i in tqdm(range(N)):
        u, v = random.choice(list(G.edges()))
        x, y = random.choice(list(G.edges()))

        if (u != x) and (v != y) and (not G.has_edge(u, y)) and (not G.has_edge(x, v)):
            # perform the double edge swap
            G.remove_edges_from([(u, v), (x, y)])
            G.add_edges_from([(u, y), (x, v)])

    return G

def plot_density(modularities, value):
    plt.hist(modularities, bins=50, alpha=0.5, density=True)
    plt.axvline(x=value, color='r')

    plt.xlabel('Modularity')
    plt.ylabel('Density')
    plt.title('Distribution of Random Modularities with Actual Modularity')
    plt.savefig('Modularities')
    plt.show()


def is_significantly_dif_from_zero(G, value):

    print("current modularity = ", value)

    modularities = []
    for i in tqdm(range(number_of_iterations)):
        current_double_edge_swap = double_edge_swap_algo(G, N)
        current_modularity = modularity(current_double_edge_swap, coms)
        modularities.append(current_modularity)

    mean = np.mean(modularities)
    std = np.std(modularities)

    print("mean: ", mean)
    print("std: ", std)

    plot_density(modularities, value)

    if value < mean-2*std or value > mean+2*std:
        print("Value is significantly different")
    else:
        print("Value is not significantly different")

In [None]:
modularity_from_current_communities = modularity(G, partition)
is_significantly_dif_from_zero(G, modularity_from_current_communities)

current modularity =  0.9230918387463779


  0%|          | 0/1000 [00:00<?, ?it/s]
  0%|          | 0/1951526 [00:00<?, ?it/s][A
  0%|          | 1/1951526 [00:00<301:18:41,  1.80it/s][A
  0%|          | 2/1951526 [00:01<297:35:55,  1.82it/s][A
  0%|          | 3/1951526 [00:01<308:35:47,  1.76it/s][A
  0%|          | 4/1951526 [00:02<305:30:17,  1.77it/s][A
  0%|          | 5/1951526 [00:02<301:42:18,  1.80it/s][A
  0%|          | 6/1951526 [00:03<299:01:22,  1.81it/s][A
  0%|          | 7/1951526 [00:03<297:50:38,  1.82it/s][A
  0%|          | 8/1951526 [00:04<299:16:28,  1.81it/s][A
  0%|          | 9/1951526 [00:04<297:15:45,  1.82it/s][A
  0%|          | 10/1951526 [00:05<297:35:58,  1.82it/s][A
  0%|          | 11/1951526 [00:06<295:58:28,  1.83it/s][A
  0%|          | 12/1951526 [00:06<294:41:22,  1.84it/s][A
  0%|          | 13/1951526 [00:07<293:23:32,  1.85it/s][A
  0%|          | 14/1951526 [00:07<294:20:23,  1.84it/s][A
  0%|          | 15/1951526 [00:08<299:10:57,  1.81it/s][A
  0%|          | 16/1