In [1]:
pip install node2vec

Collecting node2vecNote: you may need to restart the kernel to use updated packages.

  Downloading node2vec-0.4.4-py3-none-any.whl (6.8 kB)
Collecting joblib<2.0.0,>=1.1.0
  Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)
     ------------------------------------ 307.0/307.0 KB 240.4 kB/s eta 0:00:00
Collecting tqdm<5.0.0,>=4.55.1
  Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
     -------------------------------------- 78.4/78.4 KB 364.7 kB/s eta 0:00:00
Collecting numpy<2.0.0,>=1.19.5
  Downloading numpy-1.22.3-cp310-cp310-win_amd64.whl (14.7 MB)
     ---------------------------------------- 14.7/14.7 MB 1.8 MB/s eta 0:00:00
Collecting networkx<3.0,>=2.5
  Downloading networkx-2.7.1-py3-none-any.whl (2.0 MB)
     ---------------------------------------- 2.0/2.0 MB 1.5 MB/s eta 0:00:00
Collecting gensim<5.0.0,>=4.1.2
  Downloading gensim-4.1.2.tar.gz (23.2 MB)
     ---------------------------------------- 23.2/23.2 MB 1.8 MB/s eta 0:00:00
  Preparing metadata (setup.py



In [2]:
import networkx as nx
from node2vec import Node2Vec

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Create a graph
graph = nx.fast_gnp_random_graph(n=100, p=0.5)

In [5]:
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)  # Use temp_folder for big graphs

Computing transition probabilities: 100%|██████████| 100/100 [00:01<00:00, 97.90it/s]


In [6]:
# Embed nodes
model = node2vec.fit(window=10, min_count=1, batch_words=4)  # Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)

In [7]:
# Look for most similar nodes
model.wv.most_similar('2')  # Output node names are always strings

[('46', 0.527676522731781),
 ('74', 0.5064496397972107),
 ('72', 0.4751788377761841),
 ('25', 0.47396326065063477),
 ('95', 0.4681757688522339),
 ('84', 0.4650200605392456),
 ('52', 0.4648228883743286),
 ('10', 0.44883275032043457),
 ('17', 0.4398011565208435),
 ('30', 0.4394145905971527)]

In [13]:
# Save embeddings for later use
model.wv.save_word2vec_format("D:\EMBEDDING.csv")

In [23]:
# Save model for later use
model.save("D:\EMBEDDING_MODEL_FILENAME")

In [15]:
# Embed edges using Hadamard method
from node2vec.edges import HadamardEmbedder

In [17]:
edges_embs = HadamardEmbedder(keyed_vectors=model.wv)

In [18]:
# Look for embeddings on the fly - here we pass normal tuples
edges_embs[('1', '2')]

array([ 9.5807249e-03,  3.7254172e-03,  3.3791587e-02,  9.5447665e-04,
       -9.6970331e-03,  5.6540142e-03,  5.4635282e-04,  1.6554417e-02,
        1.8860998e-02, -1.0470149e-02, -2.7012920e-03,  6.3212596e-02,
        1.8040871e-02, -2.8443108e-03,  2.2936007e-03,  1.5903729e-03,
        3.0545103e-03,  4.3581918e-02, -1.0016528e-02, -9.4600610e-04,
        6.0090929e-04,  1.4733123e-03,  4.4853043e-02, -2.7831388e-03,
       -3.6959606e-03,  3.1730190e-02,  1.2685126e-02,  2.9302988e-04,
       -8.8048750e-04, -2.4939064e-04, -1.0356703e-02, -6.9592549e-03,
        8.8103302e-03, -3.5269305e-03,  4.7687180e-02, -4.4514312e-04,
        3.8027801e-03, -5.3887283e-03, -7.9686684e-04,  1.9116161e-03,
       -1.3124545e-02, -1.6640109e-03,  4.9113831e-03,  1.3847258e-02,
        8.7748020e-05,  1.6984027e-02,  1.3410002e-03,  9.8642218e-04,
       -1.5710502e-04, -5.7513160e-03,  1.3005565e-03,  2.9148201e-02,
       -2.1914061e-02, -2.0090316e-03,  4.9060967e-04,  2.6583536e-02,
      

In [19]:
# Get all edges in a separate KeyedVectors instance - use with caution could be huge for big networks
edges_kv = edges_embs.as_keyed_vectors()

Generating edge features: 100%|██████████| 5050/5050.0 [00:00<00:00, 88591.42it/s]


In [20]:
# Look for most similar edges - this time tuples must be sorted and as str
edges_kv.most_similar(str(('1', '2')))

[("('1', '72')", 0.7549440860748291),
 ("('1', '17')", 0.7177075743675232),
 ("('1', '74')", 0.7153148055076599),
 ("('2', '72')", 0.6955525279045105),
 ("('2', '74')", 0.6657482981681824),
 ("('2', '2')", 0.6613296270370483),
 ("('2', '84')", 0.6541402339935303),
 ("('2', '88')", 0.6535706520080566),
 ("('1', '84')", 0.6446952819824219),
 ("('1', '95')", 0.635215163230896)]

In [22]:
# Save embeddings for later use
edges_kv.save_word2vec_format("D:\EDGES_EMBEDDING_FILENAME.csv")