# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/911allwords.gml")

In [3]:
print(nx.info(g))

Graph with 13308 nodes and 148035 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [11]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 13308 nodes and 148035 edges
Graph with 13308 nodes and 148035 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [13]:
g = g_relabled

# Infer community structure with Louvain

In [14]:
nx.__version__

'2.8.4'

In [15]:
import pandas as pd
from os.path  import exists
import infomap as im

In [16]:
im.__version__

'2.3.0'

In [17]:
G = g

In [18]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [19]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [20]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [21]:
results_file = "./results/911allwords-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/911allwords-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/911allwords-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/911allwords-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [22]:
len(mec_ranks)

13308

In [23]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

['12482',
 '1199',
 '11756',
 '8995',
 '548',
 '1664',
 '8244',
 '9374',
 '12909',
 '8530',
 '918',
 '7739',
 '9157',
 '13147',
 '10635',
 '820',
 '3008',
 '12874',
 '12260',
 '8991',
 '4759',
 '2500',
 '5182',
 '6939',
 '11900',
 '13143',
 '3259',
 '12962',
 '603',
 '13135',
 '5276',
 '8528',
 '12021',
 '7771',
 '5683',
 '11477',
 '543',
 '11903',
 '4698',
 '11863',
 '8793',
 '631',
 '12121',
 '1791',
 '8239',
 '2000',
 '8145',
 '12961',
 '9209',
 '11611',
 '6337',
 '4769',
 '11345',
 '8260',
 '4884',
 '4482',
 '11247',
 '8074',
 '9384',
 '703',
 '6587',
 '6243',
 '12000',
 '8149',
 '12239',
 '10025',
 '8234',
 '7387',
 '3347',
 '5689',
 '11794',
 '13027',
 '618',
 '11588',
 '7621',
 '458',
 '4497',
 '8869',
 '1057',
 '561',
 '7011',
 '10492',
 '2135',
 '7898',
 '819',
 '8998',
 '2319',
 '10702',
 '2242',
 '4612',
 '11967',
 '7334',
 '8614',
 '625',
 '10724',
 '4596',
 '10322',
 '3959',
 '8896',
 '6116',
 '10026',
 '9269',
 '6918',
 '1250',
 '8629',
 '11965',
 '10676',
 '13215',
 '773

In [24]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [25]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [26]:
dict_map_eq_centrality

{12482: 13308,
 1199: 13307,
 11756: 13306,
 8995: 13305,
 548: 13304,
 1664: 13303,
 8244: 13302,
 9374: 13301,
 12909: 13300,
 8530: 13299,
 918: 13298,
 7739: 13297,
 9157: 13296,
 13147: 13295,
 10635: 13294,
 820: 13293,
 3008: 13292,
 12874: 13291,
 12260: 13290,
 8991: 13289,
 4759: 13288,
 2500: 13287,
 5182: 13286,
 6939: 13285,
 11900: 13284,
 13143: 13283,
 3259: 13282,
 12962: 13281,
 603: 13280,
 13135: 13279,
 5276: 13278,
 8528: 13277,
 12021: 13276,
 7771: 13275,
 5683: 13274,
 11477: 13273,
 543: 13272,
 11903: 13271,
 4698: 13270,
 11863: 13269,
 8793: 13268,
 631: 13267,
 12121: 13266,
 1791: 13265,
 8239: 13264,
 2000: 13263,
 8145: 13262,
 12961: 13261,
 9209: 13260,
 11611: 13259,
 6337: 13258,
 4769: 13257,
 11345: 13256,
 8260: 13255,
 4884: 13254,
 4482: 13253,
 11247: 13252,
 8074: 13251,
 9384: 13250,
 703: 13249,
 6587: 13248,
 6243: 13247,
 12000: 13246,
 8149: 13245,
 12239: 13244,
 10025: 13243,
 8234: 13242,
 7387: 13241,
 3347: 13240,
 5689: 13239,
 117

In [27]:
# Write Centrality
with open('./MapEquationCentralityResults/Misc-911AllWords/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [28]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/Misc-911AllWords/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [29]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True