# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/yeast_protein.gml")

In [3]:
print(nx.info(g))

Graph with 1458 nodes and 1948 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 1458 nodes and 1948 edges
Graph with 1458 nodes and 1948 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/yeast_protein-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/yeast_protein-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/yeast_protein-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/yeast_protein-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

1458

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[157,
 25,
 68,
 57,
 55,
 105,
 15,
 238,
 69,
 315,
 2,
 18,
 224,
 464,
 34,
 1041,
 56,
 155,
 66,
 546,
 371,
 393,
 343,
 204,
 143,
 277,
 287,
 406,
 214,
 44,
 35,
 635,
 251,
 346,
 544,
 599,
 423,
 383,
 99,
 461,
 289,
 43,
 227,
 1149,
 62,
 79,
 364,
 606,
 288,
 1285,
 621,
 29,
 72,
 33,
 551,
 693,
 760,
 70,
 876,
 462,
 186,
 845,
 916,
 714,
 284,
 837,
 668,
 573,
 975,
 738,
 47,
 76,
 489,
 175,
 812,
 473,
 570,
 579,
 790,
 109,
 100,
 367,
 320,
 172,
 276,
 59,
 16,
 121,
 932,
 945,
 87,
 150,
 151,
 247,
 321,
 917,
 556,
 145,
 634,
 992,
 821,
 243,
 442,
 853,
 416,
 598,
 920,
 211,
 106,
 575,
 1291,
 319,
 823,
 322,
 708,
 709,
 923,
 230,
 222,
 1157,
 323,
 618,
 680,
 352,
 982,
 1090,
 353,
 45,
 85,
 48,
 147,
 148,
 152,
 369,
 644,
 646,
 558,
 286,
 800,
 187,
 677,
 1155,
 1,
 1318,
 1343,
 603,
 93,
 405,
 404,
 734,
 733,
 706,
 555,
 1062,
 407,
 399,
 470,
 697,
 241,
 854,
 685,
 651,
 403,
 436,
 37,
 201,
 804,
 791,
 12,
 750,
 933,

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{157: 1458,
 25: 1457,
 68: 1456,
 57: 1455,
 55: 1454,
 105: 1453,
 15: 1452,
 238: 1451,
 69: 1450,
 315: 1449,
 2: 1448,
 18: 1447,
 224: 1446,
 464: 1445,
 34: 1444,
 1041: 1443,
 56: 1442,
 155: 1441,
 66: 1440,
 546: 1439,
 371: 1438,
 393: 1437,
 343: 1436,
 204: 1435,
 143: 1434,
 277: 1433,
 287: 1432,
 406: 1431,
 214: 1430,
 44: 1429,
 35: 1428,
 635: 1427,
 251: 1426,
 346: 1425,
 544: 1424,
 599: 1423,
 423: 1422,
 383: 1421,
 99: 1420,
 461: 1419,
 289: 1418,
 43: 1417,
 227: 1416,
 1149: 1415,
 62: 1414,
 79: 1413,
 364: 1412,
 606: 1411,
 288: 1410,
 1285: 1409,
 621: 1408,
 29: 1407,
 72: 1406,
 33: 1405,
 551: 1404,
 693: 1403,
 760: 1402,
 70: 1401,
 876: 1400,
 462: 1399,
 186: 1398,
 845: 1397,
 916: 1396,
 714: 1395,
 284: 1394,
 837: 1393,
 668: 1392,
 573: 1391,
 975: 1390,
 738: 1389,
 47: 1388,
 76: 1387,
 489: 1386,
 175: 1385,
 812: 1384,
 473: 1383,
 570: 1382,
 579: 1381,
 790: 1380,
 109: 1379,
 100: 1378,
 367: 1377,
 320: 1376,
 172: 1375,
 276: 1374,
 

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/Bio-YeastProtein/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/Bio-YeastProtein/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True