# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/dblp.gml")

In [3]:
print(nx.info(g))

Graph with 12494 nodes and 49579 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 12494 nodes and 49579 edges
Graph with 12494 nodes and 49579 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/dblp-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/dblp-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/dblp-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/dblp-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

12494

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[2,
 1610,
 141,
 919,
 517,
 8202,
 26,
 501,
 250,
 1,
 524,
 521,
 540,
 1243,
 502,
 507,
 1056,
 1513,
 500,
 39,
 906,
 516,
 1218,
 17,
 135,
 2112,
 1210,
 5084,
 514,
 510,
 6428,
 504,
 1596,
 30,
 523,
 2172,
 1212,
 988,
 254,
 8353,
 239,
 38,
 2798,
 661,
 3285,
 7562,
 532,
 599,
 3832,
 539,
 503,
 983,
 918,
 1006,
 499,
 1453,
 2386,
 7104,
 707,
 901,
 603,
 210,
 54,
 3649,
 1688,
 1002,
 29,
 505,
 14,
 739,
 1051,
 506,
 798,
 1230,
 986,
 89,
 704,
 1654,
 741,
 755,
 4688,
 1369,
 6022,
 4279,
 9725,
 10997,
 1705,
 52,
 61,
 83,
 563,
 1032,
 2042,
 2480,
 1693,
 994,
 23,
 1220,
 1108,
 3517,
 73,
 2901,
 708,
 11697,
 84,
 1956,
 636,
 2114,
 814,
 724,
 586,
 535,
 722,
 1073,
 518,
 1007,
 2541,
 630,
 4356,
 441,
 74,
 634,
 637,
 8006,
 2462,
 2746,
 273,
 658,
 483,
 119,
 1003,
 1076,
 11456,
 24,
 116,
 526,
 198,
 136,
 631,
 800,
 36,
 604,
 4743,
 58,
 1840,
 76,
 801,
 533,
 44,
 1034,
 810,
 3359,
 12200,
 109,
 2598,
 71,
 1267,
 1375,
 1387,
 10

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{2: 12494,
 1610: 12493,
 141: 12492,
 919: 12491,
 517: 12490,
 8202: 12489,
 26: 12488,
 501: 12487,
 250: 12486,
 1: 12485,
 524: 12484,
 521: 12483,
 540: 12482,
 1243: 12481,
 502: 12480,
 507: 12479,
 1056: 12478,
 1513: 12477,
 500: 12476,
 39: 12475,
 906: 12474,
 516: 12473,
 1218: 12472,
 17: 12471,
 135: 12470,
 2112: 12469,
 1210: 12468,
 5084: 12467,
 514: 12466,
 510: 12465,
 6428: 12464,
 504: 12463,
 1596: 12462,
 30: 12461,
 523: 12460,
 2172: 12459,
 1212: 12458,
 988: 12457,
 254: 12456,
 8353: 12455,
 239: 12454,
 38: 12453,
 2798: 12452,
 661: 12451,
 3285: 12450,
 7562: 12449,
 532: 12448,
 599: 12447,
 3832: 12446,
 539: 12445,
 503: 12444,
 983: 12443,
 918: 12442,
 1006: 12441,
 499: 12440,
 1453: 12439,
 2386: 12438,
 7104: 12437,
 707: 12436,
 901: 12435,
 603: 12434,
 210: 12433,
 54: 12432,
 3649: 12431,
 1688: 12430,
 1002: 12429,
 29: 12428,
 505: 12427,
 14: 12426,
 739: 12425,
 1051: 12424,
 506: 12423,
 798: 12422,
 1230: 12421,
 986: 12420,
 89: 12419

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/Cite-DBLPCite/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/Cite-DBLPCite/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True