In [None]:
!pip install node2vec

Collecting node2vec
  Downloading node2vec-0.4.3.tar.gz (4.6 kB)
Building wheels for collected packages: node2vec
  Building wheel for node2vec (setup.py) ... [?25l[?25hdone
  Created wheel for node2vec: filename=node2vec-0.4.3-py3-none-any.whl size=5980 sha256=1b2ce318da182b1860467ff2ccde66795c58f4f744af9c08d841587432a0f927
  Stored in directory: /root/.cache/pip/wheels/07/62/78/5202cb8c03cbf1593b48a8a442fca8ceec2a8c80e22318bae9
Successfully built node2vec
Installing collected packages: node2vec
Successfully installed node2vec-0.4.3


In [None]:
import networkx as nx
from node2vec import Node2Vec

EMBEDDING_FILENAME="em_filename"
EMBEDDING_MODEL_FILENAME="em_model"
EDGES_EMBEDDING_FILENAME="edg_em_file"

# Create a graph z jankiem
graph = nx.fast_gnp_random_graph(n=100, p=0.5)

# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)  # Use temp_folder for big graphs

# Embed nodes
model = node2vec.fit(window=10, min_count=1, batch_words=4)  # Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)

# Look for most similar nodes
model.wv.most_similar('2')  # Output node names are always strings

# Save embeddings for later use
model.wv.save_word2vec_format(EMBEDDING_FILENAME)

# Save model for later use
model.save(EMBEDDING_MODEL_FILENAME)

# Embed edges using Hadamard method
from node2vec.edges import HadamardEmbedder

edges_embs = HadamardEmbedder(keyed_vectors=model.wv)

# Look for embeddings on the fly - here we pass normal tuples
edges_embs[('1', '2')]
''' OUTPUT
array([ 5.75068220e-03, -1.10937878e-02,  3.76693785e-01,  2.69105062e-02,
       ... ... ....
       ..................................................................],
      dtype=float32)
'''

# Get all edges in a separate KeyedVectors instance - use with caution could be huge for big networks
edges_kv = edges_embs.as_keyed_vectors()

# Look for most similar edges - this time tuples must be sorted and as str
edges_kv.most_similar(str(('1', '2')))

# Save embeddings for later use
edges_kv.save_word2vec_format(EDGES_EMBEDDING_FILENAME)

Computing transition probabilities:   0%|          | 0/100 [00:00<?, ?it/s]

Generating edge features: 100%|██████████| 5050/5050.0 [00:00<00:00, 136389.15it/s]


In [None]:
import sys
print(sys.version)

3.7.13 (default, Mar 16 2022, 17:37:17) 
[GCC 7.5.0]


In [None]:
!cat em_filename


100 64
64 -0.0699823 0.101281054 0.15702948 -0.017423334 0.01374711 -0.09960119 0.16951418 -0.038394824 0.03661157 0.052287456 0.058180347 -0.09154187 -0.13704355 0.059709378 0.013082469 0.01832738 0.095753886 0.050827447 0.031632844 -0.010532049 0.16022323 -0.010833038 0.108111255 -0.092020206 0.056908906 -0.13160415 -0.04682909 0.04939171 0.043445103 -0.19267254 -0.13157308 -0.040441386 0.09407803 0.05181431 0.037349623 0.119752355 0.08464299 0.14943607 0.033208534 -0.06771775 -0.090260126 0.09053709 0.01657612 -0.24230012 0.25990114 -0.16003552 0.09269838 0.11438105 0.067096435 0.0042623994 -0.0063149817 0.08191992 0.24177879 0.06683253 0.13952717 -0.29270825 0.0135656195 0.095647566 -0.0043743257 -0.12174711 0.013165323 0.056321822 -0.18688625 -0.1633131
98 -0.0075067244 0.0013702926 -0.06770111 -0.059979282 0.13668716 0.07620673 -0.022008125 -0.060509533 0.026965644 0.12213553 -0.08278745 0.03466007 0.13113323 -0.04046788 -0.15038712 -0.19858074 0.041076027 -0.07851575 -0.00676302