# Implementation of Node2Vec algorithm

For more information, see: https://snap.stanford.edu/node2vec/


The algorithm on github: https://github.com/eliorc/node2vec

In [5]:
import json
import networkx as nx
import pandas as pd
from node2vec import Node2Vec
from pathlib import Path

## Embedding

In [6]:
def node2vec_embedding(graph, dim):
    print('Embedding dimension: ', str(dim), '...')
    # Precompute probabilities and generate walks
    # ON WINDOWS ONLY WORKS WITH workers=1
    node2vec = Node2Vec(graph, dimensions=dim, walk_length=20, num_walks=50, workers=1)

    # Embed nodes
    # Any keywords acceptable by gensim.Word2Vec can be passed
    # `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
    model = node2vec.fit(window=5, min_count=1, batch_words=4)  

    # Convert embedding to a dataframe
    embedding_df = (
        pd.DataFrame(
            [model.wv.get_vector(str(n)) for n in graph.nodes()],
            index = graph.nodes
        )
    )

    # Save embedding to csv
    # e.g. /embedded_partial_data/node2vec_64.csv)
    embedding_df.to_csv('embedded_partial_data/node2vec_' + str(dim) +'.csv', index=False)

In [7]:
# Read the gml file as a graph
gml_file = Path() / "../preprocessing/preprocessed_data/removed_links_network.gml"
graph = nx.read_gml(gml_file)

In [8]:
dimensions = [8, 16, 32, 64, 128]

for dim in dimensions:
    node2vec_embedding(graph, dim)

Embedding dimension:  8 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:42<00:00, 95.75it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:46<00:00,  1.08it/s]


Embedding dimension:  16 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:44<00:00, 91.14it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:47<00:00,  1.06it/s]


Embedding dimension:  32 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:44<00:00, 91.10it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:50<00:00,  1.00s/it]


Embedding dimension:  64 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:52<00:00, 76.38it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:47<00:00,  1.05it/s]


Embedding dimension:  128 ...


Computing transition probabilities: 100%|██████████| 4039/4039 [00:48<00:00, 83.36it/s] 
Generating walks (CPU: 1): 100%|██████████| 50/50 [00:47<00:00,  1.06it/s]
