# Implementation of Node2Vec algorithm

For more information, see: https://snap.stanford.edu/node2vec/


The algorithm on github: https://github.com/eliorc/node2vec

In [1]:
import json
import networkx as nx
import pandas as pd
from node2vec import Node2Vec
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


## Embedding

In [2]:
def node2vec_embedding(graph, dim):
    print('Embedding dimension: ', str(dim), '...')
    # Precompute probabilities and generate walks
    # ON WINDOWS ONLY WORKS WITH workers=1
    node2vec = Node2Vec(graph, dimensions=dim, walk_length=20, num_walks=50, workers=1)

    # Embed nodes
    # Any keywords acceptable by gensim.Word2Vec can be passed
    # `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
    model = node2vec.fit(window=5, min_count=1, batch_words=4)  

    # Convert embedding to a dataframe
    embedding_df = (
        pd.DataFrame(
            [model.wv.get_vector(str(n)) for n in graph.nodes()],
            index = graph.nodes
        )
    )

    # Save embedding to csv
    # e.g. /embedded_partial_data/node2vec_64.csv)
    embedding_df.to_csv('embedded_partial_data/node2vec_' + str(dim) +'.csv', index=False)

In [3]:
# Read the gml file as a graph
gml_file = Path() / "../preprocessing/preprocessed_data/removed_links_network.gml"
graph = nx.read_gml(gml_file)

In [4]:
dimensions = [8, 16, 32, 64, 128]

for dim in dimensions:
    node2vec_embedding(graph, dim)

Embedding dimension:  8 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:00<00:00, 37386.58it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 107.22it/s]


Embedding dimension:  16 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:00<00:00, 57950.13it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 96.32it/s] 


Embedding dimension:  32 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:00<00:00, 114334.27it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 110.22it/s]


Embedding dimension:  64 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:00<00:00, 45861.66it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 93.95it/s] 


Embedding dimension:  128 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:00<00:00, 108970.64it/s]
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:00<00:00, 111.22it/s]
