**Imports**

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import json
import networkx as nx
import numpy as np
import pandas as pd
import scipy
from sklearn.neighbors import kneighbors_graph
from tqdm import tqdm_notebook, tqdm
from collections import Counter, defaultdict
import pickle
import community
import multiprocessing as mp
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode, plot
init_notebook_mode(connected=True)
from utils import *
import numpy as np
import scipy.sparse
import math

**Loading graphs and vectors datasets**

### Cora dataset

In [4]:
G = nx.read_edgelist('../Datasets/Cora/cora_edges.txt')

vect = {}
labels = dict()
sim_file = open("../Datasets/Cora/cora_features","r")
for line in tqdm_notebook(sim_file, total=len(G.nodes()), leave=False):
    node_id = int(line.split()[0])
    features = np.asarray(list(map(float,line.split()[1:-1])))
    labels[str(node_id)] = (line.split()[-1])
    vect.update({str(node_id):features})
sim_file.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2708.0), HTML(value='')))

In [27]:
gt_labels = {}
for n in G.nodes():
    gt_labels[n] = labels[n]

### Sinanet dataset

In [54]:
G = nx.read_edgelist('../Datasets/Sinanet/Sinanet-master/edge.txt')

vect = {}
sim_file = open("../Datasets/Sinanet/Sinanet-master/content.txt","r")
line_iter = 1
for line in tqdm_notebook(sim_file, total=len(G.nodes()), leave=False):
    features = np.asarray(list(map(float,line.split())))
    vect.update({str(line_iter):features})
    line_iter +=1
sim_file.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3467.0), HTML(value='')))

In [80]:
label_dict = dict()
clust_file = open("../Datasets/Sinanet/Sinanet-master/clusters.txt","r")
clust_iter = 0
check = 0
for line in tqdm_notebook(clust_file, total=len(G.nodes()), leave=False):
    labels = np.asarray(list(map(float,line.split())))
    for l in labels:
        if l == 0:
            if check == 0:
                clust_iter += 1
                check = 1
        else:
            if check == 1:
                check = 0
            label_dict[str(int(l))] = clust_iter
clust_file.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3467.0), HTML(value='')))

### Political Blogs dataset

In [5]:
G = nx.read_gml('../Datasets/Political Blogs/polblogs.gml').to_undirected()

polit_vect = {}
for key, value in dict(G.nodes(data=True)).items():
    appending = 0
    if int(value['value']) == 0:
        appending = 1
    polit_vect.update({key: np.asarray([int(value['value']), appending])})
    
vect = polit_vect

### WebKB Wisc dataset

In [5]:
G = nx.read_edgelist('../Datasets/WebKB/Washington.cites.txt').to_undirected()

label_dict = dict()
vect = {}
sim_file = open("../Datasets/WebKB/Washington.content.txt","r")
for line in tqdm_notebook(sim_file, total=len(G.nodes()), leave=False):
    node = line.split()[0]
    label = line.split()[-1]
    features = np.asarray(list(map(int,line.split()[1:-2])))
    label_dict[node] = label
    vect.update({str(node):features})
sim_file.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=230.0), HTML(value='')))

## LFR

In [4]:
import random

In [180]:
n = 1000
tau1 = 3
tau2 = 2
mu = 0.1
G_first = nx.generators.community.LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=100, max_degree=450, seed=11
)
mu = 0.4
G_second = nx.generators.community.LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=150, max_degree=450, seed=15
)


In [153]:
communities = {frozenset(G_first.nodes[v]['community']) for v in G_first}

In [173]:
for node_1 in G_second.nodes():
    for node_2 in G_second.nodes():
        if not G_second.has_edge(node_1,node_2) and G_first.has_edge(node_1,node_2) and G_first.nodes[node_1]['community'] == G_first.nodes[node_2]['community']:
            G_first.remove_edge(int(node_1), int(node_2))

In [6]:
G_first = nx.empty_graph(3000)
G_second = nx.empty_graph(3000)

counter = 0
for edge in G_first.edges():
    if counter % 2 == 0:
        G_first.add_edge(edge[0], edge[1])
    else:
        G_second.add_edge(edge[0], edge[1])
    counter += 1

In [45]:
random_vect = {}
for i in range(n):
    features = np.random.randint(2, size=30)
    random_vect.update({int(i):features})

In [242]:
mu = 0.7
tau1 = 3
tau2 = 2

In [251]:
G_third = nx.generators.community.LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=10, max_degree=150, seed=15
)

### Graph info

In [9]:
print('Number of nodes - {0}, Number of edges - {1}'.format(G.number_of_nodes(), G.number_of_edges()))


Number of nodes - 1000, Number of edges - 12907


### Synthetic networks

### ER-BA networks

In [20]:
G_first = nx.barabasi_albert_graph(500, 13, seed=3)
G_second = nx.gnp_random_graph(500, 0.05, seed=5)

In [13]:
print('Number of nodes - {0}, Number of edges - {1}'.format(G_first.number_of_nodes(), G_first.number_of_edges()))

Number of nodes - 500, Number of edges - 6331


In [14]:
print('Number of nodes - {0}, Number of edges - {1}'.format(G_second.number_of_nodes(), G_second.number_of_edges()))

Number of nodes - 500, Number of edges - 24886


In [47]:
G_first = nx.gnp_random_graph(3000, 0.1, seed=10)
G_second = nx.gnp_random_graph(3000, 0.4, seed=7)

### ER-Star graph

In [4]:
G_first = nx.gnp_random_graph(3000, 0.1, seed=3)
G_second = nx.star_graph(2999)

In [67]:
first_seq = [i+1 for i in range(100)]
fin_first_seq = []
fin_second_seq = []
for i in range(100):
    if i < 50:
        fin_first_seq.append(first_seq[i])
    else:
        fin_first_seq.append(first_seq[i]-1)
    
fin_second_seq = list(reversed(fin_first_seq))


In [69]:
G_first = nx.generators.degree_seq.havel_hakimi_graph(fin_first_seq)
G_second = nx.generators.degree_seq.havel_hakimi_graph(fin_second_seq)

In [111]:
G_first = nx.path_graph(100)
G_second = nx.gnp_random_graph(100, 0.3, seed=3)

In [121]:
G_1 = nx.complete_graph(50)
G_2 = nx.relabel_nodes(nx.complete_graph(50), lambda x: x + 50)
G_3 = nx.relabel_nodes(nx.complete_graph(50), lambda x: x + 100)

G_first = nx.union_all([G_1,G_2,G_3])

In [122]:
G_1 = nx.classic.cycle_graph(50)
G_2 = nx.relabel_nodes(nx.cycle_graph(50), lambda x: x + 50)
G_3 = nx.relabel_nodes(nx.cycle_graph(50), lambda x: x + 100)

G_second = nx.union_all([G_1,G_2,G_3])

### Edge swap

In [17]:
G_first = nx.gnp_random_graph(500, 0.05, seed=3)
G_second = nx.double_edge_swap(G_first.copy(), nswap = 800, max_tries = 24000)

In [49]:
print('Number of nodes - {0}, Number of edges - {1}'.format(G_first.number_of_nodes(), G_first.number_of_edges()))

Number of nodes - 500, Number of edges - 6235


**Removing nodes without features**

In [82]:
nodes = list(G.nodes)

for node in nodes:
    if node not in vect:
        G.remove_node(node)
        
print('Number of nodes - {0}, Number of edges - {1}'.format(G.number_of_nodes(), G.number_of_edges()))

Number of nodes - 1000, Number of edges - 5521


**Removing nodes without edges**

In [83]:
nodes = list(G.nodes)
for node in nodes:
    degree = G.degree(node)
    if degree == 0:
        G.remove_node(node)
        
print('Number of nodes - {0}, Number of edges - {1}'.format(G.number_of_nodes(), G.number_of_edges()))

Number of nodes - 1000, Number of edges - 5521


**Calculating significance normalization & common interests statistics**

In [52]:
significance_normalized_vect, omega = significance_normalization(vect)

_, mu, sigma = calc_common_interests_stats(G_first, vect)
mu, sigma

(2.481289072818526, 1.0753005943180265)

#### Weighted graphs for synthetic data

In [9]:
for e in G_first.edges():
    G_first[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(G_first)))


for e in G_second.edges():
    G_second[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(G_second)))

Graph is weighted - True
Graph is weighted - True


## Normalize

In [58]:
f_weight = G_first.size(weight='weight')

for e in G_first.edges():
    G_first[e[0]][e[1]]['weight'] = 1/f_weight
    

In [56]:
for e in G_first.edges():
    G_first[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(G_first)))

for e in G_second.edges():
    G_second[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(G_second)))

for e in G_third.edges():
    G_third[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(G_third)))

Graph is weighted - True
Graph is weighted - True
Graph is weighted - True


In [57]:
G_second_f = create_weighted_graph_from_two_graphs(G_second, G_third, alpha=0.45)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=50877.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=50047.0), HTML(value='')))

**Creating weighted graph**

In [47]:
modified_G = G.copy()

for e in modified_G.edges():
    modified_G[e[0]][e[1]]['weight'] = 1
    
print('Graph is weighted - {0}'.format(nx.is_weighted(modified_G)))

Graph is weighted - True


**If topology is not fixed and similarity matrix file doesn't exist, then calculating similarity matrix and saving it in the file**

In [54]:
fixed_topology = False
similarity_file_exists = False

similarity_matrix_file_name = 'similarity_matrix_finals_corr_leid_1.txt'
if (fixed_topology == False and similarity_file_exists == False):
    not_fixed_topology_graph(G_first, vect, omega, cosine_sim, similarity_matrix_file_name)  

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3000.0), HTML(value='')))

#### Constructing graph from the vector data

In [55]:
attr_G = nx.Graph()

In [50]:
from random import shuffle

sim_file = open(similarity_matrix_file_name,"r")
for line in tqdm_notebook(sim_file, total=len(G.nodes()), leave=False):
    sim_tuple = eval(line)
    node = sim_tuple[0]
    res_tuple = sim_tuple[1][1:]
    for i in range(len(G.nodes())-1):
        if node != res_tuple[i][0]:
            if not attr_G.has_edge(node, res_tuple[i][0]):
                attr_G.add_edge(node, res_tuple[i][0], weight=res_tuple[i][1])
sim_file.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12056.0), HTML(value='')))

In [57]:
print('Number of nodes - {0}, Number of edges - {1}'.format(attr_G.number_of_nodes(), attr_G.number_of_edges()))

Number of nodes - 3000, Number of edges - 4498500


In [58]:
attr_G.size(weight='weight')

77402.66685016255

In [53]:
attr_G.size()

72667540

**Detecting communities and metrics calculation, based on input parameters**

In [25]:
alphas =  np.arange(0.0,1.05,0.05)

In [176]:
similarity_metrics = [cosine_sim]
metric_names = ['cosine_sim']
gaussian_weighting = [False]
algorithm = ['leiden']
metrics_report = []
clusters = 0
partition = []
clusts = {}
for n, similarity in enumerate(tqdm_notebook(similarity_metrics)):
    for alpha in tqdm_notebook(alphas, leave=False):
        for k, gaussian in enumerate(tqdm_notebook(gaussian_weighting, leave=False)):
            for algo in algorithm:
                fname = 'algorithm-{}||metric_name-{}||alpha-{}||gaussian_weighting-{}'.format(algo, metric_names[n], alpha, gaussian_weighting[k])
 
                report,clusters,G,partition = calculate_metrics_graphs(G_first, G_second, alpha, algo=algo, viz=False, average=5, ground_truth=True, gt_labels=gt_labels)

                report['alpha'] = alpha
                report['algorithm'] = algo
                metrics_report.append(report)
                clusts.update({alpha:partition})


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=21.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6373229026794434
IG Read Done in 0.5145978927612305
Done
Leiden Finished in 0.14263248443603516
1000
Leiden Finished in 0.14760589599609375
1000
Leiden Finished in 0.17554759979248047
1000
Leiden Finished in 0.09578323364257812
1000
Leiden Finished in 0.10671401023864746
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.632225751876831
IG Read Done in 0.5196089744567871
Done
Leiden Finished in 0.14667391777038574
1000
Leiden Finished in 0.11668705940246582
1000
Leiden Finished in 0.10372281074523926
1000
Leiden Finished in 0.12267160415649414
1000
Leiden Finished in 0.10671377182006836
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6482656002044678
IG Read Done in 0.49567437171936035
Done
Leiden Finished in 0.11768531799316406
1000
Leiden Finished in 0.09973335266113281
1000
Leiden Finished in 0.09886789321899414
1000
Leiden Finished in 0.10073041915893555
1000
Leiden Finished in 0.0857706069946289
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.5914201736450195
IG Read Done in 0.49271678924560547
Done
Leiden Finished in 0.14557576179504395
1000
Leiden Finished in 0.15203642845153809
1000
Leiden Finished in 0.11565995216369629
1000
Leiden Finished in 0.1476154327392578
1000
Leiden Finished in 0.1715373992919922
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7141213417053223
IG Read Done in 0.5673060417175293
Done
Leiden Finished in 0.16207170486450195
1000
Leiden Finished in 0.11768722534179688
1000
Leiden Finished in 0.11967968940734863
1000
Leiden Finished in 0.1685483455657959
1000
Leiden Finished in 0.13464021682739258
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7180905342102051
IG Read Done in 0.5502593517303467
Done
Leiden Finished in 0.1490616798400879
1000
Leiden Finished in 0.19650936126708984
1000
Leiden Finished in 0.18505501747131348
1000
Leiden Finished in 0.14162945747375488
1000
Leiden Finished in 0.21007370948791504
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7639625072479248
IG Read Done in 0.5608596801757812
Done
Leiden Finished in 0.29569482803344727
1000
Leiden Finished in 0.21445202827453613
1000
Leiden Finished in 0.2104511260986328
1000
Leiden Finished in 0.1854710578918457
1000
Leiden Finished in 0.21444272994995117
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.577486515045166
IG Read Done in 0.519650936126709
Done
Leiden Finished in 0.14760637283325195
1000
Leiden Finished in 0.16554951667785645
1000
Leiden Finished in 0.14860105514526367
1000
Leiden Finished in 0.26433253288269043
1000
Leiden Finished in 0.3339223861694336
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6413164138793945
IG Read Done in 0.5518887042999268
Done
Leiden Finished in 0.24141597747802734
1000
Leiden Finished in 0.2124319076538086
1000
Leiden Finished in 0.14959979057312012
1000
Leiden Finished in 0.22344374656677246
1000
Leiden Finished in 0.17054390907287598
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7480056285858154
IG Read Done in 0.590465784072876
Done
Leiden Finished in 0.17715740203857422
1000
Leiden Finished in 0.21438837051391602
1000
Leiden Finished in 0.2872326374053955
1000
Leiden Finished in 0.26030397415161133
1000
Leiden Finished in 0.2532837390899658
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6582369804382324
IG Read Done in 0.5754821300506592
Done
Leiden Finished in 0.2865476608276367
1000
Leiden Finished in 0.2735579013824463
1000
Leiden Finished in 0.33519816398620605
1000
Leiden Finished in 0.2064516544342041
1000
Leiden Finished in 0.17055845260620117
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.662226676940918
IG Read Done in 0.5455412864685059
Done
Leiden Finished in 0.17740631103515625
1000
Leiden Finished in 0.1685466766357422
1000
Leiden Finished in 0.24933314323425293
1000
Leiden Finished in 0.29374003410339355
1000
Leiden Finished in 0.32013916969299316
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.8487319946289062
IG Read Done in 0.6423094272613525
Done
Leiden Finished in 0.19448447227478027
1000
Leiden Finished in 0.24634075164794922
1000
Leiden Finished in 0.24035906791687012
1000
Leiden Finished in 0.22037959098815918
1000
Leiden Finished in 0.21244120597839355
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6991410255432129
IG Read Done in 0.5534787178039551
Done
Leiden Finished in 0.34993791580200195
1000
Leiden Finished in 0.18350839614868164
1000
Leiden Finished in 0.23043322563171387
1000
Leiden Finished in 0.14760923385620117
1000
Leiden Finished in 0.17154240608215332
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6632261276245117
IG Read Done in 0.5453674793243408
Done
Leiden Finished in 0.2647695541381836
1000
Leiden Finished in 0.40296220779418945
1000
Leiden Finished in 0.3407931327819824
1000
Leiden Finished in 0.2293870449066162
1000
Leiden Finished in 0.2732431888580322
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6552493572235107
IG Read Done in 0.5415806770324707
Done
Leiden Finished in 0.2653191089630127
1000
Leiden Finished in 0.27921223640441895
1000
Leiden Finished in 0.28220319747924805
1000
Leiden Finished in 0.16755175590515137
1000
Leiden Finished in 0.3961608409881592
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7689757347106934
IG Read Done in 0.5851178169250488
Done
Leiden Finished in 0.21746158599853516
1000
Leiden Finished in 0.3500993251800537
1000
Leiden Finished in 0.2962350845336914
1000
Leiden Finished in 0.2253890037536621
1000
Leiden Finished in 0.1914689540863037
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7859363555908203
IG Read Done in 0.5584688186645508
Done
Leiden Finished in 0.36983251571655273
1000
Leiden Finished in 0.27825498580932617
1000
Leiden Finished in 0.47541069984436035
1000
Leiden Finished in 0.28702855110168457
1000
Leiden Finished in 0.32782506942749023
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.7100658416748047
IG Read Done in 0.5396008491516113
Done
Leiden Finished in 0.2939457893371582
1000
Leiden Finished in 0.22136569023132324
1000
Leiden Finished in 0.35506510734558105
1000
Leiden Finished in 0.2892305850982666
1000
Leiden Finished in 0.3919854164123535
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6881561279296875
IG Read Done in 0.5553035736083984
Done
Leiden Finished in 0.27524471282958984
1000
Leiden Finished in 0.2622957229614258
1000
Leiden Finished in 0.2612621784210205
1000
Leiden Finished in 0.2912154197692871
1000
Leiden Finished in 0.22240281105041504
1000


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98266.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19388.0), HTML(value='')))

kek
NX Write Done in 0.6782245635986328
IG Read Done in 0.5425100326538086
Done
Leiden Finished in 0.5213234424591064
1000
Leiden Finished in 0.23636746406555176
1000
Leiden Finished in 0.3276989459991455
1000
Leiden Finished in 0.3829813003540039
1000
Leiden Finished in 0.31345438957214355
1000



### Save Results

In [177]:
topology = 'f' #f - fixed, nf - not fixed
sim = 'cos' #cos - cosine similarity, hamm - hamming similarity
top_k = '' #kpercent - top k percent (0, 0.25, 0.5, 0.75)
dataset = 'cora_dataset' #dataset name
result_file_name = dataset+'_'+sim+'_'+topology+top_k+'.txt'
hs = open(result_file_name,'w+')
hs.write(str(metrics_report))
hs.close()

### Load Results

In [489]:
hs = open(result_file_name,'r')

for line in hs:
    metrics_report = eval(line)

In [492]:
mods_mean = []
mods_std = []
entropy_mean = []
entropy_std = []
attr_mods_mean = []
attr_mods_std = []
modified_mods_mean = []
modified_mods_std = []

for i in metrics_report:
    mods_mean.append(i['modularity_mean'])
    attr_mods_mean.append(i['attr_modularity_mean'])
    attr_mods_std.append(i['attr_modularity_std'])
    mods_std.append(i['modularity_std'])
    modified_mods_mean.append(i['mod_modularity_mean'])
    modified_mods_std.append(i['mod_modularity_std'])
    entropy_mean.append(1 - i['graph_entropy_mean'])
    entropy_std.append(1 - i['graph_entropy_std'])

### Compute Modularities

In [493]:
alphas =  np.arange(0,1.05,0.05)
mods_sum = []
mods_alpha = []
mods_alpha_std = []
attr_mods_alpha = []
attr_mods_alpha_std = []
mods_mix = []
differential_mean = []
differential_std = []
for i in range(len(mods_mean)):
    mods_alpha.append(mods_mean[i] * alphas[i])
    mods_alpha_std.append(mods_std[i])
    attr_mods_alpha.append(attr_mods_mean[i] * (1-alphas[i]))
    attr_mods_alpha_std.append(attr_mods_std[i])    
    mods_sum.append(mods_alpha[i] + attr_mods_alpha[i])
    if alphas[i] == 0 or alphas[i] == 1:
        differential_mean.append(0)
        mods_mix.append(0)
        differential_std.append(0)
    else:
        al = (2/(alphas[i] * (1-alphas[i])))
        diff_mean = (modified_mods_mean[i] - (alphas[i] * mods_mean[i] + (1-alphas[i]) * attr_mods_mean[i]))
        differential_mean.append(diff_mean)
        diff_std = math.sqrt(modified_mods_std[i]**2 + (mods_alpha_std[i]**2 + attr_mods_alpha_std[i]**2))
        differential_std.append(diff_std)
        mix = al * diff_mean
        mods_mix.append(mix)

### Results plot

In [503]:
import plotly 

import plotly.graph_objs as go

trace0 = go.Scatter(
    x = alphas,
    y = modified_mods_mean,
    mode = 'lines+markers',
    error_y=dict(type='data', array=modified_mods_std),
    name = 'Composite',
    yaxis='y',
    line=dict(color='dodgerblue', width=2,dash='dash')
)
trace1 = go.Scatter(
    x = alphas,
    y = mods_alpha,
    mode = 'lines+markers',
    error_y=dict(type='data', array=mods_alpha_std),
    name = 'Structural',
    yaxis='y',
    line=dict(color='crimson', width=2)
)
trace2 = go.Scatter(
    x = alphas,
    y = attr_mods_alpha,
    error_y=dict(type='data', array=attr_mods_alpha_std),
    mode = 'lines+markers',
    name = 'Attributive',
    yaxis='y',
    line=dict(color='salmon', width=2)
)
trace3 = go.Scatter(
    x = alphas,
    y = differential_mean,
    error_y=dict(type='data', array=differential_std),
    mode = 'lines+markers',
    name = 'Differential',
    yaxis='y',
    line=dict(color='darkblue', width=2, dash='dash')
)

data = [trace0, trace1, trace2, trace3]

layout = go.Layout(
    yaxis=dict(
        title='Modularity'
    ),
    yaxis2=dict(
        title='NMI',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    ),
    xaxis=dict(
        title=r'$\alpha$'
    )
)

fig = go.Figure(data=data, layout=layout)
fig.update_layout(legend=dict(x=0, y=-0.2))
fig.update_layout(legend_orientation="h")
fig.update_layout(showlegend=False)
fig.show()


### Attributes-aware Modularity and Entropy

In [None]:
def Norm(x):
    mi=min(x)
    ma=max(np.array(x)-mi)
    x_n=(np.array(x)-mi)/ma
    return x_n

entropy_mean_norm = Norm(entropy_mean)
attr_mods_mean_fixed_norm = Norm(attr_mods_mean)

In [None]:
import plotly 

import plotly.graph_objs as go

trace0 = go.Scatter(
    x = alphas,
    y = entropy_mean_norm,
    mode = 'lines+markers',
    #error_y=dict(type='data', array=mod_mods_std_fixed),
    name = 'Min-max normalized Anti-Entropy',
    yaxis='y',
    line=dict(color='dodgerblue', width=2,dash='dash')
)
trace1 = go.Scatter(
    x = alphas,
    y = attr_mods_mean_fixed_norm,
    mode = 'lines+markers',
    #error_y=dict(type='data', array=mods_alpha_std),
    name = 'Min-max normalized Attributes-aware Modularity',
    yaxis='y',
    line=dict(color='crimson', width=2)
)

data = [trace0, trace1]

layout = go.Layout(
    yaxis=dict(
        title='Quality measure value'
    ),
    xaxis=dict(
        title=r'$\alpha$'
    )
)

fig = go.Figure(data=data, layout=layout)
fig.update_layout(legend=dict(x=0, y=-0.2))
fig.update_layout(legend_orientation="h")
fig.update_layout(showlegend=True)

fig.show()
fig.write_image("leiden_paper/cora_ent_attr.pdf")
