In [2]:
# import the modules
import urllib.request
import networkx as nx
import numpy as np 
import pandas as pd
import random
import matplotlib.pyplot as plt 
from node2vec import Node2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, roc_curve 
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#Load the dataset
# Define the URL of the edge list file for the Facebook-Ego network
facebook_url = "https://raw.githubusercontent.com/wang422003/Complex-Networks_exercise/main/Datasets/Group3/Facebook-Ego/348.edges"
# Download the Facebook-Ego network file
urllib.request.urlretrieve(facebook_url, "facebook.edges")
# Load the Facebook-Ego network (undirect graph)
facebook_network = nx.read_edgelist("facebook.edges", nodetype=int)

# Define the URL of the edge list file for the Twitter-Ego network
twitter_url = "https://raw.githubusercontent.com/wang422003/Complex-Networks_exercise/main/Datasets/Group3/Twitter-Ego/789071.edges"
# Download the Twitter-Ego network file
urllib.request.urlretrieve(twitter_url, "twitter.edges")
# Load the Twitter-Ego network (direct graph)
twitter_network = nx.read_edgelist("twitter.edges", nodetype=int, create_using=nx.DiGraph())

In [4]:

# Generate Node2Vec embeddings
node2vec = Node2Vec(facebook_network)
model = node2vec.fit()

Computing transition probabilities: 100%|██████████| 148/148 [00:00<00:00, 657.18it/s]
Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 12.22it/s]


In [5]:
# Extract Node2Vec embeddings
embeddings = {str(node): model.wv[str(node)] for node in facebook_network.nodes()}


In [6]:
# Define the target edges for link prediction
percentage_of_target_edges = 10
total_edges = facebook_network.number_of_edges()
num_target_edges = int((percentage_of_target_edges / 100) * total_edges)
all_edges = list(facebook_network.edges())
target_edges = random.sample(all_edges, num_target_edges)

In [7]:
# Remove target edges from the graph
train_graph = facebook_network.copy()
train_graph.remove_edges_from(target_edges)


In [9]:
# Extract positive and negative edges for training
positive_edges = [(edge[0], edge[1], 1) for edge in train_graph.edges()]
negative_edges = [(edge[0], edge[1], 0) for edge in nx.non_edges(train_graph)]


In [10]:
# Combine positive and negative edges
edges = positive_edges + negative_edges

# Create feature vectors and labels
X_edges = [np.concatenate((embeddings[str(edge[0])], embeddings[str(edge[1])])) for edge in edges]
y_edges = [label for (_, _, label) in edges]


In [11]:
from sklearn.ensemble import RandomForestClassifier
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_edges, y_edges, test_size=0.2, random_state=42)

# Train a machine learning algorithm (e.g., RandomForestClassifier)
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [12]:
# Predict on the missing edges
missing_edges = target_edges
X_missing_edges = [np.concatenate((embeddings[str(edge[0])], embeddings[str(edge[1])])) for edge in missing_edges]
y_missing_edges_true = [1 if facebook_network.has_edge(edge[0], edge[1]) else 0 for edge in missing_edges]
y_missing_edges_true = np.array(y_missing_edges_true).reshape(-1, 1)
y_missing_edges_pred = model.predict(X_missing_edges)

In [13]:
# Evaluate the performance of the model on missing edges
accuracy = accuracy_score(y_missing_edges_true, y_missing_edges_pred)
roc_auc = roc_auc_score(y_missing_edges_true, y_missing_edges_pred)
precision = precision_score(y_missing_edges_true, y_missing_edges_pred)


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [14]:
# Print the results
print("Accuracy on missing edges:", accuracy)
print("AUROC on missing edges:", roc_auc)
print("Precision on missing edges:", precision)

Accuracy on missing edges: 0.4274193548387097


NameError: name 'roc_auc' is not defined