# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/dnc_emails.gml")

In [3]:
print(nx.info(g))

Graph with 849 nodes and 10384 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 849 nodes and 10384 edges
Graph with 849 nodes and 10384 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/dnc_emails-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/dnc_emails-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/dnc_emails-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/dnc_emails-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

849

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

['7',
 '19',
 '50',
 '13',
 '16',
 '20',
 '15',
 '10',
 '84',
 '14',
 '72',
 '56',
 '11',
 '54',
 '49',
 '17',
 '88',
 '68',
 '65',
 '31',
 '45',
 '51',
 '277',
 '37',
 '33',
 '75',
 '6',
 '132',
 '69',
 '83',
 '28',
 '70',
 '74',
 '71',
 '66',
 '63',
 '29',
 '30',
 '73',
 '76',
 '256',
 '43',
 '18',
 '138',
 '24',
 '93',
 '39',
 '34',
 '160',
 '137',
 '27',
 '22',
 '257',
 '64',
 '85',
 '107',
 '94',
 '35',
 '218',
 '42',
 '23',
 '21',
 '92',
 '61',
 '55',
 '641',
 '78',
 '113',
 '153',
 '32',
 '136',
 '131',
 '148',
 '57',
 '95',
 '130',
 '142',
 '150',
 '109',
 '123',
 '129',
 '86',
 '9',
 '67',
 '103',
 '118',
 '127',
 '110',
 '114',
 '26',
 '77',
 '134',
 '119',
 '99',
 '121',
 '143',
 '126',
 '135',
 '89',
 '251',
 '302',
 '106',
 '152',
 '151',
 '147',
 '149',
 '144',
 '101',
 '146',
 '145',
 '102',
 '141',
 '140',
 '97',
 '120',
 '122',
 '139',
 '124',
 '117',
 '128',
 '112',
 '116',
 '111',
 '133',
 '100',
 '98',
 '258',
 '58',
 '439',
 '259',
 '221',
 '25',
 '125',
 '388',
 '

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{7: 849,
 19: 848,
 50: 847,
 13: 846,
 16: 845,
 20: 844,
 15: 843,
 10: 842,
 84: 841,
 14: 840,
 72: 839,
 56: 838,
 11: 837,
 54: 836,
 49: 835,
 17: 834,
 88: 833,
 68: 832,
 65: 831,
 31: 830,
 45: 829,
 51: 828,
 277: 827,
 37: 826,
 33: 825,
 75: 824,
 6: 823,
 132: 822,
 69: 821,
 83: 820,
 28: 819,
 70: 818,
 74: 817,
 71: 816,
 66: 815,
 63: 814,
 29: 813,
 30: 812,
 73: 811,
 76: 810,
 256: 809,
 43: 808,
 18: 807,
 138: 806,
 24: 805,
 93: 804,
 39: 803,
 34: 802,
 160: 801,
 137: 800,
 27: 799,
 22: 798,
 257: 797,
 64: 796,
 85: 795,
 107: 794,
 94: 793,
 35: 792,
 218: 791,
 42: 790,
 23: 789,
 21: 788,
 92: 787,
 61: 786,
 55: 785,
 641: 784,
 78: 783,
 113: 782,
 153: 781,
 32: 780,
 136: 779,
 131: 778,
 148: 777,
 57: 776,
 95: 775,
 130: 774,
 142: 773,
 150: 772,
 109: 771,
 123: 770,
 129: 769,
 86: 768,
 9: 767,
 67: 766,
 103: 765,
 118: 764,
 127: 763,
 110: 762,
 114: 761,
 26: 760,
 77: 759,
 134: 758,
 119: 757,
 99: 756,
 121: 755,
 143: 754,
 126: 753,
 1

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/OSN-DNCEmails/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/OSN-DNCEmails/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [26]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True