# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/princeton.gml")

In [3]:
print(nx.info(g))

Graph with 6575 nodes and 293307 edges



  print(nx.info(g))


# Check connectedness

In [6]:
nx.is_connected(g)

True

In [7]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [8]:
from myTruss import mappingAndRelabeling

In [9]:
g_relabled = mappingAndRelabeling(g)

In [10]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 6575 nodes and 293307 edges
Graph with 6575 nodes and 293307 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [11]:
g = g_relabled

# Infer community structure with Louvain

In [12]:
nx.__version__

'2.8.4'

In [13]:
import pandas as pd
from os.path  import exists
import infomap as im

In [14]:
im.__version__

'2.3.0'

In [15]:
G = g

In [16]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [17]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [18]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [19]:
results_file = "./results/princeton-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/princeton-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/princeton-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/princeton-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [20]:
len(mec_ranks)

6575

In [21]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[108,
 531,
 0,
 611,
 352,
 19,
 182,
 147,
 287,
 414,
 1487,
 765,
 347,
 251,
 1886,
 847,
 1992,
 1334,
 805,
 1051,
 194,
 620,
 264,
 255,
 372,
 1192,
 711,
 517,
 1376,
 329,
 320,
 1047,
 302,
 2370,
 636,
 909,
 2287,
 297,
 854,
 3313,
 862,
 432,
 400,
 614,
 826,
 896,
 65,
 318,
 232,
 1822,
 639,
 1918,
 1610,
 870,
 900,
 828,
 2444,
 1023,
 34,
 522,
 1712,
 2160,
 3246,
 211,
 732,
 2328,
 557,
 351,
 1215,
 1884,
 2243,
 2322,
 625,
 2191,
 3301,
 268,
 481,
 644,
 844,
 1932,
 1323,
 1962,
 634,
 1171,
 1986,
 2367,
 1431,
 555,
 820,
 219,
 925,
 860,
 2076,
 810,
 1024,
 377,
 408,
 296,
 415,
 2864,
 572,
 2046,
 894,
 586,
 695,
 2376,
 543,
 3183,
 1100,
 416,
 886,
 1040,
 2508,
 1732,
 2105,
 789,
 47,
 2466,
 3132,
 3239,
 1439,
 577,
 1673,
 2180,
 1852,
 346,
 938,
 2997,
 2419,
 431,
 2353,
 269,
 1871,
 356,
 2283,
 417,
 1551,
 1460,
 539,
 1922,
 1036,
 227,
 2257,
 1053,
 638,
 1289,
 3806,
 266,
 3367,
 831,
 1805,
 62,
 399,
 455,
 1472,
 842,
 696

In [22]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [23]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [28]:
dict_map_eq_centrality

{108: 6575,
 531: 6574,
 0: 6573,
 611: 6572,
 352: 6571,
 19: 6570,
 182: 6569,
 147: 6568,
 287: 6567,
 414: 6566,
 1487: 6565,
 765: 6564,
 347: 6563,
 251: 6562,
 1886: 6561,
 847: 6560,
 1992: 6559,
 1334: 6558,
 805: 6557,
 1051: 6556,
 194: 6555,
 620: 6554,
 264: 6553,
 255: 6552,
 372: 6551,
 1192: 6550,
 711: 6549,
 517: 6548,
 1376: 6547,
 329: 6546,
 320: 6545,
 1047: 6544,
 302: 6543,
 2370: 6542,
 636: 6541,
 909: 6540,
 2287: 6539,
 297: 6538,
 854: 6537,
 3313: 6536,
 862: 6535,
 432: 6534,
 400: 6533,
 614: 6532,
 826: 6531,
 896: 6530,
 65: 6529,
 318: 6528,
 232: 6527,
 1822: 6526,
 639: 6525,
 1918: 6524,
 1610: 6523,
 870: 6522,
 900: 6521,
 828: 6520,
 2444: 6519,
 1023: 6518,
 34: 6517,
 522: 6516,
 1712: 6515,
 2160: 6514,
 3246: 6513,
 211: 6512,
 732: 6511,
 2328: 6510,
 557: 6509,
 351: 6508,
 1215: 6507,
 1884: 6506,
 2243: 6505,
 2322: 6504,
 625: 6503,
 2191: 6502,
 3301: 6501,
 268: 6500,
 481: 6499,
 644: 6498,
 844: 6497,
 1932: 6496,
 1323: 6495,
 1962

In [29]:
# Write Centrality
with open('./MapEquationCentralityResults/OSN-Princeton/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [30]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/OSN-Princeton/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [31]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True