In [1]:
import sys
sys.path.append('../')

import networkx as nx
from node2vec import Node2Vec

from src.utils import load_embeddings

%load_ext autoreload
%autoreload 2

In [2]:
G = nx.read_gpickle('data/network.gpickle')
G.number_of_nodes(), G.number_of_edges()

(373, 486)

In [3]:
def generate_node_embeddings(graph: nx.Graph):
    model = Node2Vec(graph, dimensions=64, num_walks=100, walk_length=80, weight_key='dist', temp_folder='temp/', workers=1)
    result = model.fit(window=5, min_count=1)
    return result

In [4]:
%%time
embeddings = generate_node_embeddings(G)

Computing transition probabilities: 100%|██████████| 373/373 [00:00<00:00, 19357.53it/s]
Generating walks (CPU: 1): 100%|██████████| 100/100 [01:39<00:00,  1.00it/s]


CPU times: user 1min 52s, sys: 228 ms, total: 1min 52s
Wall time: 1min 52s


In [5]:
for idx, e in enumerate(G.edges()):
    if idx == 5:
        break
    print(e)

('1 AV', '3 AV')
('1 AV', 'BEDFORD AV')
('103 ST', '96 ST')
('103 ST', 'CATHEDRAL PKWY')
('103 ST', '110 ST')


In [6]:
embeddings.wv.most_similar('1 AV')

[('3 AV', 0.6665233373641968),
 ('BEDFORD AV', 0.6433871984481812),
 ('LORIMER ST', 0.6199206113815308),
 ('UNION SQ - 14 ST', 0.5477275848388672),
 ('HEWES ST', 0.35049864649772644),
 ('MARCY AV', 0.3295830488204956),
 ('6 AV', 0.3208344578742981),
 ('GRAHAM AV', 0.28009000420570374),
 ('METROPOLITAN AV', 0.21669058501720428),
 ('JAMAICA - 179 ST', 0.1911972314119339)]

In [7]:
embeddings.save('models_data/embeddings64.data')

In [8]:
# join embeddings with corresponding nodes (as x feature)
for idx, node in enumerate(G.nodes()):
    G.add_node(node, embedding=embeddings.wv[str(node)].copy())

In [9]:
nx.readwrite.write_gpickle(G, 'data/network64.gpickle')