# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/adolescent_health.gml")

In [3]:
print(nx.info(g))

Graph with 2539 nodes and 10455 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 2539 nodes and 10455 edges
Graph with 2539 nodes and 10455 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/adolescenthealth-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/adolescenthealth-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/adolescenthealth-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/adolescenthealth-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

2539

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[835,
 790,
 2055,
 604,
 2093,
 2151,
 2040,
 2038,
 1196,
 123,
 1936,
 131,
 67,
 2091,
 347,
 707,
 124,
 66,
 494,
 687,
 644,
 836,
 607,
 410,
 572,
 577,
 2282,
 1092,
 258,
 217,
 147,
 166,
 1229,
 1903,
 2077,
 2205,
 2201,
 303,
 77,
 925,
 196,
 621,
 620,
 877,
 2129,
 1955,
 2127,
 1449,
 357,
 205,
 2173,
 395,
 694,
 271,
 257,
 299,
 618,
 606,
 766,
 187,
 70,
 2231,
 2087,
 2088,
 2089,
 2039,
 224,
 321,
 417,
 16,
 256,
 208,
 431,
 580,
 2208,
 2188,
 1043,
 43,
 1095,
 339,
 600,
 101,
 379,
 495,
 189,
 931,
 2084,
 2083,
 2125,
 2235,
 2010,
 2095,
 2086,
 2113,
 2259,
 1898,
 165,
 82,
 207,
 778,
 243,
 2183,
 2082,
 2032,
 1091,
 2256,
 2315,
 2300,
 2229,
 2293,
 690,
 673,
 817,
 268,
 700,
 963,
 178,
 139,
 930,
 792,
 609,
 127,
 999,
 63,
 492,
 596,
 68,
 741,
 646,
 678,
 382,
 739,
 2013,
 1837,
 795,
 2128,
 2094,
 2012,
 1943,
 42,
 174,
 237,
 279,
 1452,
 1039,
 1459,
 162,
 203,
 575,
 1277,
 2381,
 2065,
 1744,
 2185,
 691,
 714,
 304,
 251,


In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{835: 2539,
 790: 2538,
 2055: 2537,
 604: 2536,
 2093: 2535,
 2151: 2534,
 2040: 2533,
 2038: 2532,
 1196: 2531,
 123: 2530,
 1936: 2529,
 131: 2528,
 67: 2527,
 2091: 2526,
 347: 2525,
 707: 2524,
 124: 2523,
 66: 2522,
 494: 2521,
 687: 2520,
 644: 2519,
 836: 2518,
 607: 2517,
 410: 2516,
 572: 2515,
 577: 2514,
 2282: 2513,
 1092: 2512,
 258: 2511,
 217: 2510,
 147: 2509,
 166: 2508,
 1229: 2507,
 1903: 2506,
 2077: 2505,
 2205: 2504,
 2201: 2503,
 303: 2502,
 77: 2501,
 925: 2500,
 196: 2499,
 621: 2498,
 620: 2497,
 877: 2496,
 2129: 2495,
 1955: 2494,
 2127: 2493,
 1449: 2492,
 357: 2491,
 205: 2490,
 2173: 2489,
 395: 2488,
 694: 2487,
 271: 2486,
 257: 2485,
 299: 2484,
 618: 2483,
 606: 2482,
 766: 2481,
 187: 2480,
 70: 2479,
 2231: 2478,
 2087: 2477,
 2088: 2476,
 2089: 2475,
 2039: 2474,
 224: 2473,
 321: 2472,
 417: 2471,
 16: 2470,
 256: 2469,
 208: 2468,
 431: 2467,
 580: 2466,
 2208: 2465,
 2188: 2464,
 1043: 2463,
 43: 2462,
 1095: 2461,
 339: 2460,
 600: 2459,
 101:

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/HumanSocial-AdolescentHealth/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/HumanSocial-AdolescentHealth/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True