In [40]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Red ratio by different policies - Random source nodes.

In [41]:
# Different policies.
policies = ['Node2vec', 'ResourceAllocation', 'PreferencialAttachment', 'JaccardCoefficient', 'Gain', 'ExpGain', 
           'AdamicAdar', 'Random']

In [42]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sRandomSources.txt' %policy)

In [43]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Random Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455d27d220>

### Red ratio by different policies - Best red source nodes.

In [44]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sRedSources.txt' %policy)

In [45]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Best Red Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455e95f1f0>

### Red ratio by different policies - Best blue source nodes.

In [46]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sBlueSources.txt' %policy)

In [47]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Best Blue Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455d34db20>

### Average acceptance probability (node2vec recommendation) by different policies - Random source nodes.

In [48]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sRandomSources.txt' %policy)

In [49]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Random Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455ea59340>

### Average acceptance probability (node2vec recommendation) by different policies - Best red source nodes.

In [50]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sRedSources.txt' %policy)

In [51]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Best Red Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455eae32e0>

### Average acceptance probability (node2vec recommendation) by different policies - Best blue source nodes.

In [52]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sBlueSources.txt' %policy)

In [53]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Best Blue Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455ea370a0>

### Final PageRank vs initial PageRank - Random source nodes.

In [77]:
# Load PageRanks.
initialPagerank = np.loadtxt('out_pagerank.txt', skiprows= 1, usecols= 1)
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sRandomSources.txt' %policy, skiprows=1, usecols= 1)

In [78]:
# Load Communities.
tempCommunities = np.loadtxt('out_community.txt', skiprows= 1, dtype =int)
communities= np.zeros(tempCommunities[:,1].size, dtype= int)

for i in range(communities.size):
    node = tempCommunities[i][0]
    community = tempCommunities[i][1]
    communities[node] = community

In [133]:
# To calculate red ratio of network every 10 nodes.
def getRedRatio(pagerank, communities):
    index = np.argsort(-pagerank)
    redPagerank = 0.
    totalPagerank = 0.
    redRatio = 0.
    redRatios = []
    for i in range(pagerank.size):
        totalPagerank += pagerank[index[i] ]
        if communities[index[i] ]:
            redPagerank += pagerank[index[i] ]
        if (i + 1) % 10 == 0:
            redRatio = redPagerank / totalPagerank
            redRatios.append(redRatio)
    
    return redRatios

In [134]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [137]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455eac0b50>

### Final PageRank vs initial PageRank - Best red source nodes.

In [138]:
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sRedSources.txt' %policy, skiprows=1, usecols= 1)

In [139]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [140]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1455efebbe0>

### Final PageRank vs initial PageRank - Best blue source nodes.

In [141]:
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sBlueSources.txt' %policy, skiprows=1, usecols= 1)

In [142]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [143]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x14562cf6c70>

### Target nodes analysis by each policy - Random source nodes.

In [144]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sRandomSources.txt' %policy, sep= '\t')

In [152]:
edgesSelected['Node2vec'].head()

Unnamed: 0,Source,Target,node2vecScore,resAllocScore,jaccCoefScore,prefAttScore,adamicAdarScore,gain,expGain
0,1204,454,0.975626,0.273275,0.032,3645,2.22518,-3.2e-05,-3.1e-05
1,1190,454,0.847475,0.01495,0.008163,972,0.39972,-2.4e-05,-2e-05
2,701,454,0.946166,0.0,0.0,729,0.0,0.000288,0.000272
3,69,454,0.737878,0.643713,0.067669,9963,5.13824,-2e-06,-1e-06
4,1059,454,0.768113,0.0,0.0,243,0.0,0.000182,0.00014


In [150]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()

In [151]:
uniqueDestinations

{'Node2vec': 17,
 'ResourceAllocation': 380,
 'PreferencialAttachment': 18,
 'JaccardCoefficient': 628,
 'Gain': 27,
 'ExpGain': 50,
 'AdamicAdar': 347,
 'Random': 766}

In [187]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.709913,0.507952,0.640211,0.369803,0.28364,0.580713,0.507121,0.343841
resAllocScore,0.091495,0.265137,0.133244,0.168203,0.003749,0.030133,0.25493,0.015569
jaccCoefScore,0.020408,0.112975,0.025898,0.265611,0.014136,0.021932,0.118794,0.020127
prefAttScore,3937.168033,3659.00082,5838.439344,1884.616393,122.172951,1937.67623,3657.730328,585.577869
adamicAdarScore,1.095282,3.241303,1.692272,2.479008,0.064016,0.454746,3.355209,0.277686
gain,8.2e-05,1.8e-05,5.3e-05,4e-06,0.000267,0.000208,1.9e-05,3.9e-05
expGain,6.8e-05,7e-06,4.4e-05,5e-06,8.6e-05,0.000134,6e-06,1.6e-05


### Target nodes analysis by each policy - Best red source nodes.

In [188]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sRedSources.txt' %policy, sep= '\t')

In [189]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()
uniqueDestinations

{'Node2vec': 12,
 'ResourceAllocation': 278,
 'PreferencialAttachment': 11,
 'JaccardCoefficient': 408,
 'Gain': 21,
 'ExpGain': 40,
 'AdamicAdar': 250,
 'Random': 695}

In [190]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.6738,0.516824,0.626146,0.380776,0.301394,0.571095,0.511317,0.361328
resAllocScore,0.122551,0.239108,0.136179,0.138787,0.00724,0.060847,0.223446,0.013487
jaccCoefScore,0.0214,0.09076,0.019588,0.262745,0.027541,0.030433,0.095939,0.020356
prefAttScore,3177.395,2936.491,4582.956,1338.508,67.438,1035.603,3059.216,477.086
adamicAdarScore,1.222828,2.263575,1.428224,1.600237,0.095778,0.727784,2.359128,0.223359
gain,0.000174,0.000428,7.5e-05,0.000456,0.000687,0.000521,0.000435,8.1e-05
expGain,0.000147,0.000249,6.2e-05,0.000209,0.000238,0.000354,0.000248,4e-05


### Target nodes analysis by each policy - Best blue source nodes.

In [191]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sBlueSources.txt' %policy, sep= '\t')

In [192]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()
uniqueDestinations

{'Node2vec': 10,
 'ResourceAllocation': 335,
 'PreferencialAttachment': 10,
 'JaccardCoefficient': 425,
 'Gain': 11,
 'ExpGain': 24,
 'AdamicAdar': 318,
 'Random': 683}

In [193]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.415677,0.296557,0.379139,0.205959,0.189658,0.352293,0.300125,0.217795
resAllocScore,0.016324,0.081034,0.032722,0.038446,0.00036,0.003612,0.076052,0.005077
jaccCoefScore,0.0046,0.074188,0.007367,0.283544,0.005182,0.002552,0.075517,0.010607
prefAttScore,1022.54,854.715,1509.74,194.19,21.356,570.463,888.974,186.323
adamicAdarScore,0.232432,0.96456,0.47219,0.62574,0.007247,0.071522,0.998127,0.110305
gain,0.000326,-0.001005,5.4e-05,-0.001091,0.001665,0.001212,-0.001042,-9.7e-05
expGain,0.000267,-0.000743,2.1e-05,-0.000564,0.000545,0.00084,-0.000792,-7.4e-05


### Networks quality features.

In [194]:
df = pd.read_csv('networksQualityFeatures.txt', sep= '\t')
df

Unnamed: 0,NumberOfNodes,1222
0,NumberOfEdges,16717.0
1,RedRatio,0.520458
2,RedPagerank,0.34999
3,Homophily,0.188747


In [195]:
df = pd.read_csv('groupQualityFeatures.txt', sep= '\t')
df

Unnamed: 0,Group,Ratio,homophily
0,0,0.479542,0.0
1,1,0.520458,0.348809
