# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/pgp.gml")

In [3]:
print(nx.info(g))

Graph with 10680 nodes and 24316 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 10680 nodes and 24316 edges
Graph with 10680 nodes and 24316 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/pgp-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/pgp-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/pgp-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/pgp-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

10680

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[9,
 79,
 659,
 8,
 300,
 143,
 112,
 1508,
 380,
 19,
 21,
 126,
 16,
 382,
 161,
 386,
 1546,
 246,
 18,
 2803,
 646,
 235,
 120,
 20,
 11,
 67,
 393,
 515,
 248,
 1447,
 124,
 364,
 127,
 3960,
 22,
 39,
 209,
 34,
 1467,
 26,
 116,
 291,
 10,
 3932,
 1536,
 24,
 111,
 1535,
 587,
 123,
 1465,
 2809,
 44,
 4,
 215,
 3637,
 2570,
 38,
 156,
 410,
 1472,
 29,
 13,
 237,
 130,
 886,
 654,
 14,
 32,
 219,
 223,
 3809,
 45,
 25,
 28,
 239,
 218,
 202,
 221,
 224,
 228,
 229,
 199,
 220,
 3589,
 142,
 40,
 30,
 47,
 230,
 3963,
 589,
 36,
 31,
 173,
 2806,
 216,
 217,
 222,
 225,
 226,
 3958,
 15,
 390,
 559,
 815,
 163,
 42,
 152,
 477,
 200,
 35,
 1530,
 37,
 33,
 1890,
 113,
 1547,
 238,
 4123,
 147,
 49,
 153,
 1469,
 3937,
 3941,
 1534,
 2893,
 80,
 242,
 4198,
 800,
 154,
 43,
 146,
 618,
 3953,
 3959,
 3944,
 3946,
 3939,
 3938,
 3935,
 3934,
 3954,
 2875,
 41,
 169,
 27,
 416,
 396,
 252,
 128,
 255,
 3948,
 3961,
 3940,
 3936,
 3955,
 3951,
 2300,
 2888,
 1278,
 2381,
 148,
 144,

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{9: 10680,
 79: 10679,
 659: 10678,
 8: 10677,
 300: 10676,
 143: 10675,
 112: 10674,
 1508: 10673,
 380: 10672,
 19: 10671,
 21: 10670,
 126: 10669,
 16: 10668,
 382: 10667,
 161: 10666,
 386: 10665,
 1546: 10664,
 246: 10663,
 18: 10662,
 2803: 10661,
 646: 10660,
 235: 10659,
 120: 10658,
 20: 10657,
 11: 10656,
 67: 10655,
 393: 10654,
 515: 10653,
 248: 10652,
 1447: 10651,
 124: 10650,
 364: 10649,
 127: 10648,
 3960: 10647,
 22: 10646,
 39: 10645,
 209: 10644,
 34: 10643,
 1467: 10642,
 26: 10641,
 116: 10640,
 291: 10639,
 10: 10638,
 3932: 10637,
 1536: 10636,
 24: 10635,
 111: 10634,
 1535: 10633,
 587: 10632,
 123: 10631,
 1465: 10630,
 2809: 10629,
 44: 10628,
 4: 10627,
 215: 10626,
 3637: 10625,
 2570: 10624,
 38: 10623,
 156: 10622,
 410: 10621,
 1472: 10620,
 29: 10619,
 13: 10618,
 237: 10617,
 130: 10616,
 886: 10615,
 654: 10614,
 14: 10613,
 32: 10612,
 219: 10611,
 223: 10610,
 3809: 10609,
 45: 10608,
 25: 10607,
 28: 10606,
 239: 10605,
 218: 10604,
 202: 10603,


In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/OSN-PGP/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/OSN-PGP/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True