# Implementation of Node2Vec algorithm

For more information, see: https://snap.stanford.edu/node2vec/


The algorithm on github: https://github.com/eliorc/node2vec

In [2]:
import networkx as nx
from node2vec import Node2Vec
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Read the gml file as a graph
facebook_gml_file = Path() / "preprocessed_data/facebook_network.gml"
facebook_graph = nx.read_gml(facebook_gml_file)

In [4]:
# Precompute probabilities and generate walks
# ON WINDOWS ONLY WORKS WITH workers=1
node2vec = Node2Vec(facebook_graph, dimensions=64, walk_length=30, num_walks=200, workers=4)

# Embed nodes
# Any keywords acceptable by gensim.Word2Vec can be passed
# `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
model = node2vec.fit(window=10, min_count=1, batch_words=4)  

Computing transition probabilities: 100%|██████████| 4039/4039 [01:07<00:00, 60.16it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [08:56<00:00, 10.73s/it]
Generating walks (CPU: 2): 100%|██████████| 50/50 [09:03<00:00, 10.87s/it]
Generating walks (CPU: 3): 100%|██████████| 50/50 [09:07<00:00, 10.95s/it]
Generating walks (CPU: 4): 100%|██████████| 50/50 [09:01<00:00, 10.84s/it]


In [5]:
# Look for the 10 most similar nodes to node 1
# Output node names are always strings
for node in model.wv.most_similar('1', topn=10):
    print(node)

('299', 0.9299114942550659)
('126', 0.9274719953536987)
('48', 0.9256669282913208)
('53', 0.907521665096283)
('194', 0.9074535965919495)
('92', 0.9061319231987)
('302', 0.9008632302284241)
('73', 0.895550549030304)
('180', 0.8922739028930664)
('24', 0.8905556797981262)


In [6]:
model.wv.save_word2vec_format('node2vec_embeddings.txt')