# Real-world network

In [1]:
import networkx as nx
from networkx.algorithms.centrality import *
import numpy as np
import operator
from analysis import *

In [2]:
g = nx.read_gml("./RealNetworks/deezer_eu.gml")

In [3]:
print(nx.info(g))

Graph with 28281 nodes and 92752 edges



  print(nx.info(g))


# Check connectedness

In [4]:
nx.is_connected(g)

True

In [5]:
nx.number_connected_components(g)

1

# Get LCC if graph is disconnected

# Relabel it all if nodes are strings
- To be able to run all types of correlation together
- But put the graph relabled in `g_relabled`

In [6]:
from myTruss import mappingAndRelabeling

In [7]:
g_relabled = mappingAndRelabeling(g)

In [8]:
print(nx.info(g))
print(nx.info(g_relabled))

Graph with 28281 nodes and 92752 edges
Graph with 28281 nodes and 92752 edges



  print(nx.info(g))

  print(nx.info(g_relabled))


In [9]:
g = g_relabled

# Infer community structure with Louvain

In [10]:
nx.__version__

'2.8.4'

In [11]:
import pandas as pd
from os.path  import exists
import infomap as im

In [12]:
im.__version__

'2.3.0'

In [13]:
G = g

In [14]:
partition   = nx.algorithms.community.louvain_communities(G, seed = 0)
partition_Q = nx.algorithms.community.modularity(G, partition)

for seed in Bar("Progress", check_tty = False).iter(range(1,5)):
    partition_   = nx.algorithms.community.louvain_communities(G, seed = seed)
    partition_Q_ = nx.algorithms.community.modularity(G, partition_)
    
    if partition_Q_ > partition_Q:
        partition   = partition_
        partition_Q = partition_Q_

Progress |################################| 4/4
[?25h

In [15]:
communities = { community_ID : set(community) for (community_ID, community) in enumerate(partition, start = 1)}

In [16]:
community_assignments = dict()
for (community_ID, community) in communities.items():
    for node in community:
        community_assignments[node] = community_ID

In [17]:
results_file = "./results/deezereu-modularity.csv"

if not exists(results_file):
    # Map Equation Centrality
    tree_file = "./results/deezereu-temp.tree"

    # use Infomap to write a tree file that we change to reflect the Louvain partition
    infomap = im.Infomap(silent = True, num_trials = 1, directed = False, two_level = True, seed = 42)
    infomap.add_networkx_graph(G)
    infomap.run()
    infomap.write_tree(tree_file)
    
    infomap_tree = dict()

    # read the tree file
    with open(tree_file, "r") as fh:
        for line in fh:
            if not line.startswith("#"):
                _, flow, name, node_id = line.strip().split(" ")
                name = name[1:-1]
                infomap_tree[int(name)] = (flow, node_id)
    
    # write the Louvain partition to an Infomap tree file
    with open("./results/deezereu-modularity.tree", "w") as fh:
        for community_ID, community in enumerate(partition, start = 1):
            for node_pos, node in enumerate(community, start = 1):
                path = f"{community_ID}:{node_pos}"
                flow, node_id = infomap_tree[node]
                fh.write(f"{path} {flow} \"{node}\" {node_id}\n")
    
    # Load the Louvain partition with Infomap and use unrecorded link teleportation
    infomap2 = im.Infomap("--silent --no-infomap --cluster-data ./results/deezereu-modularity.tree")
    node_mapping = infomap2.add_networkx_graph(G)
    infomap2.run()
    
    mec_ranks = [node for node, _score in sorted( [ (node_mapping[node.node_id], node.modular_centrality) for node in infomap2.iterLeafNodes() ]
                                                 , key     = second
                                                 , reverse = True
                                                 )]

    
    index = []
    for node in G.nodes:
        index.append(node)

    results = pd.DataFrame(index=index)

    results["map equation centrality rank"]       = toRanking(mec_ranks,        results.index)
    results.to_csv(results_file)

else:
    results = pd.read_csv(results_file, index_col = 0)

    mec_ranks         = mkRanks(results["map equation centrality rank"])

In [18]:
len(mec_ranks)

28281

In [19]:
# node 107 has rank 0 (i.e. 1)
# node 1684 has rank 1 (i.e. 2)
# ...
# node 0 has rank 4 (i.e. 5)
mec_ranks

[508,
 2284,
 509,
 995,
 1263,
 844,
 737,
 4322,
 772,
 7026,
 1013,
 2682,
 881,
 4812,
 371,
 343,
 5565,
 3976,
 2122,
 1358,
 3349,
 974,
 118,
 1520,
 112,
 5256,
 805,
 3810,
 685,
 80,
 2244,
 3375,
 10553,
 574,
 8,
 1212,
 342,
 1867,
 2317,
 338,
 3394,
 6775,
 344,
 5515,
 4899,
 1381,
 1668,
 2502,
 3850,
 7112,
 4332,
 1710,
 6562,
 1968,
 2919,
 376,
 2456,
 2689,
 4383,
 4327,
 265,
 2731,
 341,
 3966,
 1180,
 2936,
 1955,
 1757,
 937,
 5699,
 345,
 2710,
 1221,
 1533,
 879,
 10858,
 3496,
 1518,
 2715,
 2516,
 4759,
 1352,
 3185,
 2964,
 4580,
 5038,
 562,
 524,
 3492,
 865,
 7175,
 2706,
 2116,
 3897,
 741,
 5504,
 541,
 6953,
 357,
 1354,
 5496,
 5435,
 4319,
 4932,
 385,
 6530,
 7220,
 2830,
 1833,
 745,
 1215,
 1578,
 1278,
 2183,
 1707,
 747,
 2902,
 191,
 827,
 9654,
 571,
 3493,
 7089,
 3434,
 2987,
 928,
 2624,
 6699,
 349,
 2277,
 8478,
 3002,
 7826,
 3413,
 1834,
 3419,
 4628,
 4202,
 3371,
 1983,
 3313,
 7,
 2557,
 4603,
 744,
 390,
 8658,
 1207,
 534,
 222

In [20]:
counter = len(mec_ranks)
dict_map_eq_centrality = {}

In [21]:
for node in mec_ranks:
    dict_map_eq_centrality[int(node)] = counter
    counter = counter - 1

In [22]:
dict_map_eq_centrality

{508: 28281,
 2284: 28280,
 509: 28279,
 995: 28278,
 1263: 28277,
 844: 28276,
 737: 28275,
 4322: 28274,
 772: 28273,
 7026: 28272,
 1013: 28271,
 2682: 28270,
 881: 28269,
 4812: 28268,
 371: 28267,
 343: 28266,
 5565: 28265,
 3976: 28264,
 2122: 28263,
 1358: 28262,
 3349: 28261,
 974: 28260,
 118: 28259,
 1520: 28258,
 112: 28257,
 5256: 28256,
 805: 28255,
 3810: 28254,
 685: 28253,
 80: 28252,
 2244: 28251,
 3375: 28250,
 10553: 28249,
 574: 28248,
 8: 28247,
 1212: 28246,
 342: 28245,
 1867: 28244,
 2317: 28243,
 338: 28242,
 3394: 28241,
 6775: 28240,
 344: 28239,
 5515: 28238,
 4899: 28237,
 1381: 28236,
 1668: 28235,
 2502: 28234,
 3850: 28233,
 7112: 28232,
 4332: 28231,
 1710: 28230,
 6562: 28229,
 1968: 28228,
 2919: 28227,
 376: 28226,
 2456: 28225,
 2689: 28224,
 4383: 28223,
 4327: 28222,
 265: 28221,
 2731: 28220,
 341: 28219,
 3966: 28218,
 1180: 28217,
 2936: 28216,
 1955: 28215,
 1757: 28214,
 937: 28213,
 5699: 28212,
 345: 28211,
 2710: 28210,
 1221: 28209,
 1533

In [23]:
# Write Centrality
with open('./MapEquationCentralityResults/OSN-DeezerEU/dict_map_eq_centrality.txt', 'w') as f:
    for key, value in dict_map_eq_centrality.items():
        f.write('%s:%s\n' % (key, value))

In [24]:
# Read Centrality
dict_map_eq_centrality_read = dict()
with open('./MapEquationCentralityResults/OSN-DeezerEU/dict_map_eq_centrality.txt') as raw_data:
    for item in raw_data:
        if ':' in item:
            key,value = item.split(':', 1)
            value = value.replace('\n', '')
            dict_map_eq_centrality_read[int(key)]=float(value)
        else:
            pass # deal with bad lines of text here

In [25]:
dict_map_eq_centrality_read == dict_map_eq_centrality

True