# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/caltech.gml")

In [3]:
print(nx.info(g))

Graph with 1015 nodes and 70654 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 1015 nodes and 70654 edges
Graph with 1015 nodes and 70654 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/caltech-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/caltech-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/caltech-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/caltech-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

1015

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[187,
 53,
 52,
 256,
 96,
 153,
 403,
 95,
 621,
 175,
 145,
 613,
 501,
 141,
 402,
 196,
 623,
 510,
 91,
 493,
 144,
 148,
 654,
 732,
 64,
 386,
 131,
 660,
 511,
 219,
 736,
 699,
 500,
 77,
 142,
 300,
 726,
 622,
 92,
 499,
 130,
 218,
 707,
 503,
 133,
 129,
 656,
 502,
 147,
 234,
 276,
 280,
 513,
 495,
 734,
 492,
 704,
 138,
 563,
 478,
 217,
 491,
 149,
 223,
 46,
 146,
 464,
 231,
 708,
 248,
 75,
 738,
 761,
 220,
 270,
 213,
 311,
 483,
 516,
 497,
 332,
 657,
 655,
 124,
 123,
 127,
 652,
 209,
 661,
 413,
 197,
 154,
 277,
 664,
 230,
 735,
 120,
 271,
 288,
 79,
 614,
 182,
 696,
 143,
 475,
 247,
 468,
 697,
 481,
 63,
 282,
 523,
 58,
 625,
 467,
 301,
 727,
 188,
 494,
 221,
 659,
 47,
 128,
 24,
 200,
 756,
 461,
 55,
 620,
 180,
 799,
 814,
 210,
 158,
 856,
 632,
 466,
 648,
 211,
 118,
 76,
 203,
 89,
 151,
 779,
 195,
 126,
 244,
 476,
 235,
 460,
 173,
 82,
 485,
 782,
 299,
 471,
 482,
 388,
 29,
 228,
 519,
 156,
 651,
 633,
 207,
 722,
 806,
 518,
 269,


In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{187: 1015,
 53: 1014,
 52: 1013,
 256: 1012,
 96: 1011,
 153: 1010,
 403: 1009,
 95: 1008,
 621: 1007,
 175: 1006,
 145: 1005,
 613: 1004,
 501: 1003,
 141: 1002,
 402: 1001,
 196: 1000,
 623: 999,
 510: 998,
 91: 997,
 493: 996,
 144: 995,
 148: 994,
 654: 993,
 732: 992,
 64: 991,
 386: 990,
 131: 989,
 660: 988,
 511: 987,
 219: 986,
 736: 985,
 699: 984,
 500: 983,
 77: 982,
 142: 981,
 300: 980,
 726: 979,
 622: 978,
 92: 977,
 499: 976,
 130: 975,
 218: 974,
 707: 973,
 503: 972,
 133: 971,
 129: 970,
 656: 969,
 502: 968,
 147: 967,
 234: 966,
 276: 965,
 280: 964,
 513: 963,
 495: 962,
 734: 961,
 492: 960,
 704: 959,
 138: 958,
 563: 957,
 478: 956,
 217: 955,
 491: 954,
 149: 953,
 223: 952,
 46: 951,
 146: 950,
 464: 949,
 231: 948,
 708: 947,
 248: 946,
 75: 945,
 738: 944,
 761: 943,
 220: 942,
 270: 941,
 213: 940,
 311: 939,
 483: 938,
 516: 937,
 497: 936,
 332: 935,
 657: 934,
 655: 933,
 124: 932,
 123: 931,
 127: 930,
 652: 929,
 209: 928,
 661: 927,
 413: 926,
 197

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/OSN-Caltech/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/OSN-Caltech/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True