# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/kegg_metabolic.gml")

In [3]:
print(nx.info(g))

Graph with 1865 nodes and 5769 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 1865 nodes and 5769 edges
Graph with 1865 nodes and 5769 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/keggmetabolic-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/keggmetabolic-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/keggmetabolic-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/keggmetabolic-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

1865

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[0,
 5,
 50,
 4,
 392,
 2,
 3,
 1,
 7,
 6,
 11,
 9,
 16,
 12,
 8,
 21,
 19,
 13,
 886,
 22,
 17,
 20,
 748,
 394,
 36,
 18,
 27,
 34,
 227,
 393,
 25,
 58,
 29,
 42,
 60,
 37,
 86,
 106,
 30,
 24,
 33,
 397,
 56,
 868,
 400,
 41,
 969,
 88,
 43,
 52,
 990,
 109,
 48,
 28,
 398,
 537,
 69,
 26,
 98,
 1152,
 35,
 94,
 76,
 70,
 749,
 39,
 64,
 402,
 399,
 405,
 395,
 785,
 153,
 411,
 127,
 46,
 538,
 1057,
 989,
 783,
 154,
 67,
 1150,
 296,
 407,
 120,
 178,
 632,
 66,
 23,
 540,
 110,
 770,
 79,
 421,
 1408,
 189,
 131,
 1138,
 49,
 146,
 148,
 1302,
 51,
 680,
 403,
 55,
 396,
 662,
 73,
 31,
 82,
 57,
 175,
 121,
 286,
 1038,
 38,
 32,
 627,
 563,
 1189,
 550,
 302,
 546,
 608,
 136,
 138,
 107,
 68,
 1277,
 862,
 406,
 72,
 44,
 1199,
 409,
 224,
 553,
 544,
 422,
 408,
 161,
 216,
 572,
 231,
 536,
 696,
 679,
 811,
 641,
 1829,
 634,
 638,
 712,
 420,
 541,
 923,
 418,
 142,
 75,
 287,
 100,
 132,
 1143,
 89,
 858,
 622,
 14,
 85,
 765,
 1523,
 401,
 65,
 192,
 404,
 1312,
 246,


In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{0: 1865,
 5: 1864,
 50: 1863,
 4: 1862,
 392: 1861,
 2: 1860,
 3: 1859,
 1: 1858,
 7: 1857,
 6: 1856,
 11: 1855,
 9: 1854,
 16: 1853,
 12: 1852,
 8: 1851,
 21: 1850,
 19: 1849,
 13: 1848,
 886: 1847,
 22: 1846,
 17: 1845,
 20: 1844,
 748: 1843,
 394: 1842,
 36: 1841,
 18: 1840,
 27: 1839,
 34: 1838,
 227: 1837,
 393: 1836,
 25: 1835,
 58: 1834,
 29: 1833,
 42: 1832,
 60: 1831,
 37: 1830,
 86: 1829,
 106: 1828,
 30: 1827,
 24: 1826,
 33: 1825,
 397: 1824,
 56: 1823,
 868: 1822,
 400: 1821,
 41: 1820,
 969: 1819,
 88: 1818,
 43: 1817,
 52: 1816,
 990: 1815,
 109: 1814,
 48: 1813,
 28: 1812,
 398: 1811,
 537: 1810,
 69: 1809,
 26: 1808,
 98: 1807,
 1152: 1806,
 35: 1805,
 94: 1804,
 76: 1803,
 70: 1802,
 749: 1801,
 39: 1800,
 64: 1799,
 402: 1798,
 399: 1797,
 405: 1796,
 395: 1795,
 785: 1794,
 153: 1793,
 411: 1792,
 127: 1791,
 46: 1790,
 538: 1789,
 1057: 1788,
 989: 1787,
 783: 1786,
 154: 1785,
 67: 1784,
 1150: 1783,
 296: 1782,
 407: 1781,
 120: 1780,
 178: 1779,
 632: 1778,
 66

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/Bio-KeggMetabolic/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/Bio-KeggMetabolic/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True