# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/human_protein.gml")

In [3]:
print(nx.info(g))

Graph with 2217 nodes and 6418 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 2217 nodes and 6418 edges
Graph with 2217 nodes and 6418 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/human_protein-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/human_protein-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/human_protein-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/human_protein-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

2217

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[32,
 128,
 352,
 72,
 123,
 50,
 158,
 9,
 89,
 94,
 309,
 7,
 226,
 37,
 1643,
 283,
 437,
 49,
 500,
 167,
 1526,
 197,
 28,
 238,
 200,
 150,
 77,
 386,
 83,
 79,
 204,
 47,
 202,
 564,
 490,
 171,
 495,
 56,
 334,
 116,
 291,
 862,
 136,
 253,
 1278,
 212,
 180,
 4,
 266,
 70,
 102,
 66,
 756,
 531,
 109,
 133,
 356,
 30,
 293,
 131,
 332,
 23,
 492,
 74,
 112,
 462,
 468,
 0,
 668,
 126,
 144,
 13,
 764,
 15,
 121,
 91,
 174,
 118,
 245,
 268,
 55,
 824,
 26,
 481,
 8,
 154,
 708,
 587,
 752,
 507,
 1152,
 1174,
 427,
 222,
 642,
 357,
 494,
 444,
 106,
 124,
 161,
 398,
 320,
 1079,
 801,
 793,
 267,
 895,
 644,
 95,
 397,
 313,
 1159,
 261,
 582,
 1130,
 1009,
 1015,
 211,
 1109,
 51,
 695,
 1096,
 713,
 353,
 1131,
 746,
 663,
 366,
 825,
 193,
 617,
 336,
 350,
 339,
 42,
 962,
 1012,
 1192,
 431,
 875,
 508,
 1275,
 1234,
 1616,
 885,
 2,
 331,
 52,
 520,
 527,
 682,
 533,
 1,
 75,
 48,
 301,
 217,
 686,
 231,
 61,
 851,
 555,
 750,
 690,
 163,
 244,
 624,
 1071,
 954,
 384,

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{32: 2217,
 128: 2216,
 352: 2215,
 72: 2214,
 123: 2213,
 50: 2212,
 158: 2211,
 9: 2210,
 89: 2209,
 94: 2208,
 309: 2207,
 7: 2206,
 226: 2205,
 37: 2204,
 1643: 2203,
 283: 2202,
 437: 2201,
 49: 2200,
 500: 2199,
 167: 2198,
 1526: 2197,
 197: 2196,
 28: 2195,
 238: 2194,
 200: 2193,
 150: 2192,
 77: 2191,
 386: 2190,
 83: 2189,
 79: 2188,
 204: 2187,
 47: 2186,
 202: 2185,
 564: 2184,
 490: 2183,
 171: 2182,
 495: 2181,
 56: 2180,
 334: 2179,
 116: 2178,
 291: 2177,
 862: 2176,
 136: 2175,
 253: 2174,
 1278: 2173,
 212: 2172,
 180: 2171,
 4: 2170,
 266: 2169,
 70: 2168,
 102: 2167,
 66: 2166,
 756: 2165,
 531: 2164,
 109: 2163,
 133: 2162,
 356: 2161,
 30: 2160,
 293: 2159,
 131: 2158,
 332: 2157,
 23: 2156,
 492: 2155,
 74: 2154,
 112: 2153,
 462: 2152,
 468: 2151,
 0: 2150,
 668: 2149,
 126: 2148,
 144: 2147,
 13: 2146,
 764: 2145,
 15: 2144,
 121: 2143,
 91: 2142,
 174: 2141,
 118: 2140,
 245: 2139,
 268: 2138,
 55: 2137,
 824: 2136,
 26: 2135,
 481: 2134,
 8: 2133,
 154: 2132

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/Bio-HumanProtein/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/Bio-HumanProtein/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True