In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import networkx as nx
import scipy

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
NUM_FEATURES    = 1
EMBED_SIZE      = 512
USE_NORMALIZE   = False
LAYERS = 2

In [None]:
def _normalize_array_by_rank(true_value):
  rank = np.argsort(true_value, kind='mergesort', axis=None) #deg list get's normalised
  norm = np.empty([nr_nodes])

  for i in range(0, nr_nodes):

    norm[rank[i]] = float(i+1) / float(nr_nodes)
  
  max = np.amax(norm)
  min = np.amin(norm)
  if max > 0.0 and max > min:
    for i in range(0, nr_nodes):
      norm[i] = 2.0*(float(norm[i] - min) / float(max - min)) - 1.0
  else:
    print("Max value = 0")

  return norm, rank

In [None]:
#build feature matrix
def get_degree(i):
  return degree_norm[i]

def build_feature_matrix():
  n = nr_nodes
  feature_matrix = []
  for i in range(0, n):
    feature_matrix.append(get_degree(i))
  return feature_matrix

In [None]:
#Read in and create NetworkX Graph; G
path = '/content/drive/MyDrive/Colab_files/p2p-Gnutella08.txt'

G = nx.read_edgelist(path, comments='#', delimiter=None, create_using=nx.DiGraph,
                  nodetype=None, data=True, edgetype=None, encoding='utf-8')

print(nx.info(G))


In [None]:
#Creating list of Degrees of the nodes in G and normalising them:

deg_lst = [val for (node, val) in G.degree()]
nr_nodes = G.number_of_nodes()
print("deg_lst: \n", deg_lst)
degree_norm, degree_rank = _normalize_array_by_rank(deg_lst)


In [None]:
#computing Ground-truth values and normalising them:
b = [v for v in nx.betweenness_centrality(G).values()]
BC_norm_cent, BC_cent_rank = _normalize_array_by_rank(b)

In [None]:
#Structure2Vec node embedding
A = nx.to_numpy_matrix(G)

dim = [nr_nodes, NUM_FEATURES]


node_features = tf.cast(build_feature_matrix(), tf.float32) 
node_features = tf.reshape(node_features, dim)

initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, 
                                                              mode="fan_avg", 
                                                              distribution="uniform")
#print(initializer)

A = tf.sparse.from_dense(A)
A = tf.cast(A, tf.float32)
w1 = tf.Variable(initializer((NUM_FEATURES, EMBED_SIZE)), trainable=True,
                                dtype=tf.float32, name="w1")
print("w1", w1)
w2 = tf.Variable(initializer((EMBED_SIZE, EMBED_SIZE)), trainable=True,
                                dtype=tf.float32, name="w2")
print("w2", w2)

w3 = tf.Variable(initializer((1,EMBED_SIZE)), trainable=True, dtype=tf.float32, name="w3")
print("w3", w3)

w4 = tf.Variable(initializer([]), trainable=True, dtype=tf.float32, name="w4")
print("w4:",w4)

wx_all = tf.matmul(node_features, w1)  # NxE
#computing X1:
#sparse.reduce_sum: Computes the sum of elements across dimensions of a SparseTensor.
weight_sum_init = tf.sparse.reduce_sum(A, axis=1, keepdims=True, ) #takes adjacency matrix 
print(weight_sum_init)
n_nodes = tf.shape(input=A)[1]
print("n_nodes: ", n_nodes)
print('_____________________________________________________')

print(weight_sum_init)
weight_sum = tf.multiply(weight_sum_init, w4)
weight_sum = tf.nn.relu(weight_sum)  # Nx1 
weight_sum = tf.matmul(weight_sum, w3)  # NxE

weight_wx = tf.add(wx_all, weight_sum)
current_mu = tf.nn.relu(weight_wx)  # NxE = H^0

for i in range(0, LAYERS):
  neighbor_sum = tf.sparse.sparse_dense_matmul(A, current_mu)
  neighbor_linear = tf.matmul(neighbor_sum, w2)  # NxE

  current_mu = tf.nn.relu(tf.add(neighbor_linear, weight_wx))  # NxE

mu_all = current_mu



print('_____________________________________________________')
print("tf.shape(mu_all): ",tf.shape(mu_all))    




In [None]:
#Building NN model

UNITS        = int(EMBED_SIZE/2)
def build_model():
  model = tf.keras.Sequential()
  model.add(tf.keras.Input(shape=(6301,512)))
  model.add(tf.keras.layers.Dense(UNITS, activation ="relu"))
  model.add(tf.keras.layers.Dense(UNITS, activation= "relu"))

  model.add(tf.keras.layers.Dense(1))
  model.compile(optimizer='sgd', loss='mse')

  model.summary()
  

  return model

model = build_model()

In [None]:
x_train = mu_all
y_train = BC_norm_cent
print(tf.shape(x_train))
print(tf.shape(y_train))


tf.Tensor([6301  512], shape=(2,), dtype=int32)
tf.Tensor([6301], shape=(1,), dtype=int32)


In [None]:
#computing cross validation
k  = nr_nodes-1 
num_val_samples = len(x_train) // k
all_scores = []
for i in range(k):
  print('processing fold #', i)
  val_data = x_train[i*num_val_samples: (i+1) * num_val_samples]
  val_targets = y_train[i*num_val_samples: (i+1)*num_val_samples]

  partial_train_data = np.concatenate(
      [x_train[:i*num_val_samples],
       x_train[(i+1)*num_val_samples:]],
       axis = 0)
  print(tf.shape(partial_train_data))
  
  partial_train_targets = np.concatenate(
      [y_train[:i*num_val_samples],
       y_train[(i+1)*num_val_samples:]],
       axis = 0)
  
  model = build_model()
  

In [None]:
num_epochs = 100
callbacks =  tf.keras.callbacks.EarlyStopping(
    monitor= 'loss', min_delta=0, patience=3, verbose=1,
    mode='auto', baseline=None, restore_best_weights=False)

model.fit(partial_train_data, partial_train_targets, 
          epochs = num_epochs, batch_size = 1, callbacks = callbacks, verbose = 1)
print("model.metrics_names: ", model.metrics_names)

val_loss = model.evaluate(val_data, val_targets, verbose = 1)

all_scores.append(val_loss)
print(all_scores)


In [None]:
model.save("GN08_model_v2.h5")
model.save("/content/drive/MyDrive/Colab_files/GN08_model_v2.h5")

In [None]:
#Computing Kendall on trained set
x_new = x_train
y_pred = model.predict(x_new)

from scipy import stats

kendall_tau, p_value = scipy.stats.kendalltau(BC_norm_cent,y_pred)

print(kendall_tau)

Workflow for predicting Betweenness centrality 

In [None]:
'''Gnutella 31'''
path2 = '/content/drive/MyDrive/Colab_files/p2p-Gnutella31.txt'
#/content/drive
#/gdrive/MyDrive

G2 = nx.read_edgelist(path2, comments='#', delimiter=None, create_using=nx.DiGraph,
                  nodetype=None, data=True, edgetype=None, encoding='utf-8')

print(nx.info(G2))


Name: 
Type: DiGraph
Number of nodes: 62586
Number of edges: 147892
Average in degree:   2.3630
Average out degree:   2.3630


In [None]:
#Creating list of Degrees of the nodes in G2:

deg_lst_G2 = [val for (node, val) in G2.degree()]
nr_nodes = G2.number_of_nodes()
print("deg_lst_G2: \n", deg_lst_G2)
degree_norm_G2, degree_rank_G2 = _normalize_array_by_rank(deg_lst_G2)
print(nr_nodes)

deg_lst_G2: 
 [23, 36, 6, 36, 13, 15, 31, 18, 18, 22, 20, 7, 3, 2, 17, 17, 8, 2, 5, 17, 1, 3, 8, 3, 3, 3, 1, 2, 1, 1, 4, 27, 6, 12, 2, 1, 1, 12, 4, 2, 2, 22, 2, 11, 1, 2, 12, 1, 3, 1, 4, 2, 16, 13, 11, 4, 1, 1, 7, 1, 21, 19, 19, 25, 19, 21, 30, 18, 25, 13, 15, 2, 2, 2, 42, 1, 1, 3, 13, 4, 19, 5, 1, 1, 1, 8, 6, 11, 8, 1, 19, 5, 1, 1, 1, 1, 3, 5, 1, 1, 28, 1, 4, 2, 2, 1, 1, 18, 15, 1, 17, 22, 8, 18, 2, 4, 1, 7, 1, 1, 1, 3, 1, 1, 6, 11, 1, 1, 2, 1, 3, 12, 1, 2, 1, 2, 1, 12, 1, 2, 2, 13, 13, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4, 4, 2, 13, 2, 1, 2, 13, 1, 10, 1, 1, 1, 4, 11, 1, 1, 4, 5, 2, 11, 7, 2, 2, 4, 1, 11, 1, 4, 1, 10, 31, 3, 1, 1, 1, 1, 4, 1, 2, 6, 14, 12, 6, 13, 12, 2, 12, 2, 22, 17, 1, 3, 3, 1, 24, 14, 1, 1, 18, 26, 1, 2, 1, 1, 18, 23, 24, 1, 1, 23, 2, 2, 4, 2, 16, 17, 1, 1, 1, 13, 2, 8, 2, 2, 4, 3, 2, 1, 2, 11, 1, 2, 4, 1, 1, 1, 1, 19, 2, 1, 28, 4, 4, 3, 14, 18, 5, 7, 3, 5, 16, 33, 1, 11, 1, 16, 2, 39, 19, 18, 2, 27, 20, 25, 25, 27, 30, 25, 24, 11, 26, 5, 3, 5, 32, 4, 26, 30, 6, 2, 2, 

In [None]:
x_train = mu_all
y_train = BC_norm_cent
print(tf.shape(x_train))
print(tf.shape(y_train))


In [None]:
#re-loading model:
path_model = '/content/drive/MyDrive/Colab_files/GN08_model.h5'

model = tf.keras.models.load_model(path_model)

In [None]:
x_new = mu_all
y_pred = model.predict(x_new)
