In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Red ratio by different policies - Random source nodes.

In [2]:
# Different policies.
policies = ['Node2vec', 'ResourceAllocation', 'PreferencialAttachment', 'JaccardCoefficient', 'Gain', 'ExpGain', 
           'AdamicAdar', 'Random']

In [3]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sRandomSources.txt' %policy)

In [4]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Random Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19df9fb7d30>

### Red ratio by different policies - Best red source nodes.

In [5]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sRedSources.txt' %policy)

In [6]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Best Red Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfaf57640>

### Red ratio by different policies - Best blue source nodes.

In [7]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('redRatioBy%sBlueSources.txt' %policy)

In [8]:
fig = plt.figure()
fig.suptitle('Red Ratio For Different Policies - Best Blue Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Red Ratio')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfab768b0>

### Average acceptance probability (node2vec recommendation) by different policies - Random source nodes.

In [9]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sRandomSources.txt' %policy)

In [10]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Random Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfac07d60>

### Average acceptance probability (node2vec recommendation) by different policies - Best red source nodes.

In [11]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sRedSources.txt' %policy)

In [12]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Best Red Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfac6b9d0>

### Average acceptance probability (node2vec recommendation) by different policies - Best blue source nodes.

In [13]:
# Load red ratio per epoch for each policy.
redRatio = dict()
for policy in policies:    
    redRatio[policy] = np.loadtxt('node2vecBy%sBlueSources.txt' %policy)

In [14]:
fig = plt.figure()
fig.suptitle('Average Acceptance Probability For Different Policies - Best Blue Source Nodes.')
plt.xlabel('Epoch')
plt.ylabel('Acceptance Probability')
xAxis = np.arange(len(redRatio['Node2vec']) )
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfa037df0>

### Final PageRank vs initial PageRank - Random source nodes.

In [15]:
# Load PageRanks.
initialPagerank = np.loadtxt('out_pagerank.txt', skiprows= 1, usecols= 1)
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sRandomSources.txt' %policy, skiprows=1, usecols= 1)

In [16]:
# Load Communities.
tempCommunities = np.loadtxt('out_community.txt', skiprows= 1, dtype =int)
communities= np.zeros(tempCommunities[:,1].size, dtype= int)

for i in range(communities.size):
    node = tempCommunities[i][0]
    community = tempCommunities[i][1]
    communities[node] = community

In [17]:
# To calculate red ratio of network every 10 nodes.
def getRedRatio(pagerank, communities):
    index = np.argsort(-pagerank)
    redPagerank = 0.
    totalPagerank = 0.
    redRatio = 0.
    redRatios = []
    for i in range(pagerank.size):
        totalPagerank += pagerank[index[i] ]
        if communities[index[i] ]:
            redPagerank += pagerank[index[i] ]
        if (i + 1) % 10 == 0:
            redRatio = redPagerank / totalPagerank
            redRatios.append(redRatio)
    
    return redRatios

In [18]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [19]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19dfa029dc0>

### Final PageRank vs initial PageRank - Best red source nodes.

In [20]:
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sRedSources.txt' %policy, skiprows=1, usecols= 1)

In [21]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [22]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19d812d2460>

### Final PageRank vs initial PageRank - Best blue source nodes.

In [23]:
finalPagerank = dict()
for policy in policies:
    finalPagerank[policy] = np.loadtxt('finalPagerankBy%sBlueSources.txt' %policy, skiprows=1, usecols= 1)

In [24]:
# Calculate red ratio per 10 nodes for all policies.
initialRedRatio = getRedRatio(initialPagerank, communities)
redRatios = dict()
for policy in policies:
    redRatio[policy] = getRedRatio(finalPagerank[policy], communities)

In [25]:
fig = plt.figure()
fig.suptitle("Network's PageRank by order - 10 Nodes points.")
plt.xlabel("Nodes")
plt.ylabel("Red Ratio")
xAxis= np.arange(1, len(initialRedRatio) + 1)
plt.plot(xAxis, initialRedRatio, label= 'Initial')
for policy in policies:
    plt.plot(xAxis, redRatio[policy], label= policy)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x19d811463a0>

### Target nodes analysis by each policy - Random source nodes.

In [26]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sRandomSources.txt' %policy, sep= '\t')

In [27]:
edgesSelected['Node2vec'].head()

Unnamed: 0,Source,Target,node2vecScore,resAllocScore,jaccCoefScore,prefAttScore,adamicAdarScore,gain,expGain
0,16525,0,1.0,0.0,0.0,5,0.0,-7e-06,-7e-06
1,1190,0,1.0,0.0,0.0,14,0.0,2e-06,2e-06
2,3593,6964,0.970024,0.003175,0.004785,209,0.173835,3e-05,2.9e-05
3,7550,6964,0.923008,0.007937,0.004785,209,0.20677,3e-05,2.8e-05
4,17576,6964,0.937705,0.0,0.0,209,0.0,3e-05,2.8e-05


In [28]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()

In [29]:
uniqueDestinations

{'Node2vec': 51,
 'ResourceAllocation': 5184,
 'PreferencialAttachment': 12,
 'JaccardCoefficient': 7492,
 'Gain': 154,
 'ExpGain': 105,
 'AdamicAdar': 4833,
 'Random': 11692}

In [30]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.919964,0.503613,0.513771,0.3883605,0.578917,0.717939,0.515007,0.3261448
resAllocScore,0.004035,0.08666,0.012807,0.05052223,0.00029,0.000814,0.0806,0.0001795223
jaccCoefScore,0.001693,0.245165,0.001021,0.4600524,0.000905,0.001116,0.244389,0.002329881
prefAttScore,515.348565,627.630861,2112.380726,317.9956,12.332918,79.976936,777.231456,26.3895
adamicAdarScore,0.050848,0.499783,0.111464,0.3863572,0.001747,0.01129,0.532371,0.002890983
gain,9e-06,-2e-06,-7e-06,-8.384949e-07,8.3e-05,7.9e-05,-3e-06,1.131456e-06
expGain,9e-06,-3e-06,-4e-06,-1.941148e-06,5.3e-05,5.7e-05,-4e-06,-1.910666e-07


### Target nodes analysis by each policy - Best red source nodes.

In [31]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sRedSources.txt' %policy, sep= '\t')

In [32]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()
uniqueDestinations

{'Node2vec': 13,
 'ResourceAllocation': 648,
 'PreferencialAttachment': 10,
 'JaccardCoefficient': 705,
 'Gain': 19,
 'ExpGain': 23,
 'AdamicAdar': 636,
 'Random': 972}

In [190]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.6738,0.516824,0.626146,0.380776,0.301394,0.571095,0.511317,0.361328
resAllocScore,0.122551,0.239108,0.136179,0.138787,0.00724,0.060847,0.223446,0.013487
jaccCoefScore,0.0214,0.09076,0.019588,0.262745,0.027541,0.030433,0.095939,0.020356
prefAttScore,3177.395,2936.491,4582.956,1338.508,67.438,1035.603,3059.216,477.086
adamicAdarScore,1.222828,2.263575,1.428224,1.600237,0.095778,0.727784,2.359128,0.223359
gain,0.000174,0.000428,7.5e-05,0.000456,0.000687,0.000521,0.000435,8.1e-05
expGain,0.000147,0.000249,6.2e-05,0.000209,0.000238,0.000354,0.000248,4e-05


### Target nodes analysis by each policy - Best blue source nodes.

In [191]:
# Load edges selected by each policy.
edgesSelected = dict()
for policy in policies:
    edgesSelected[policy] = pd.read_csv('edgesSelectedBy%sBlueSources.txt' %policy, sep= '\t')

In [192]:
uniqueDestinations = dict()
for policy in policies:
    uniqueDestinations[policy] = edgesSelected[policy]['Target'].nunique()
uniqueDestinations

{'Node2vec': 10,
 'ResourceAllocation': 335,
 'PreferencialAttachment': 10,
 'JaccardCoefficient': 425,
 'Gain': 11,
 'ExpGain': 24,
 'AdamicAdar': 318,
 'Random': 683}

In [193]:
mean = dict()
for policy in policies:
    mean[policy] = edgesSelected[policy].mean().take([2,3,4,5,6,7,8])

mean = pd.DataFrame.from_dict(mean)
print("Mean values for selected edges per policy")
mean

Mean values for selected edges per policy


Unnamed: 0,Node2vec,ResourceAllocation,PreferencialAttachment,JaccardCoefficient,Gain,ExpGain,AdamicAdar,Random
node2vecScore,0.415677,0.296557,0.379139,0.205959,0.189658,0.352293,0.300125,0.217795
resAllocScore,0.016324,0.081034,0.032722,0.038446,0.00036,0.003612,0.076052,0.005077
jaccCoefScore,0.0046,0.074188,0.007367,0.283544,0.005182,0.002552,0.075517,0.010607
prefAttScore,1022.54,854.715,1509.74,194.19,21.356,570.463,888.974,186.323
adamicAdarScore,0.232432,0.96456,0.47219,0.62574,0.007247,0.071522,0.998127,0.110305
gain,0.000326,-0.001005,5.4e-05,-0.001091,0.001665,0.001212,-0.001042,-9.7e-05
expGain,0.000267,-0.000743,2.1e-05,-0.000564,0.000545,0.00084,-0.000792,-7.4e-05


### Networks quality features.

In [194]:
df = pd.read_csv('networksQualityFeatures.txt', sep= '\t')
df

Unnamed: 0,NumberOfNodes,1222
0,NumberOfEdges,16717.0
1,RedRatio,0.520458
2,RedPagerank,0.34999
3,Homophily,0.188747


In [195]:
df = pd.read_csv('groupQualityFeatures.txt', sep= '\t')
df

Unnamed: 0,Group,Ratio,homophily
0,0,0.479542,0.0
1,1,0.520458,0.348809
