In [59]:
import pandas as pd
import numpy as np
import networkx as nx
import random

%pip install node2vec
from node2vec import Node2Vec
from gensim.models import Word2Vec

from sklearn.metrics.pairwise import cosine_similarity

Note: you may need to restart the kernel to use updated packages.


In [60]:
chosen_event = 'wildfire'

In [61]:
path = f'subgraphs_data/{chosen_event}_subgraph.graphml'
g = nx.read_graphml(path)

In [63]:
# let us first get access to the users of the chosen event
users = []
for node, data in g.nodes(data=True):
    for key, value in data.items():
        if key == 'labels':
            if value == ':User':
                users.append({node : data})

In [64]:
# select first a sample of 100 users
random.seed(55)
users_sample = random.sample(users, 100)

In [65]:
# extract the nodes ids in order to get a subgraph containing our sample of nodes
node_sample_ids = []
for i in range(len(users_sample)):
    for key, _ in users_sample[i].items():
        node_sample_ids.append(key)

# define the subgraph with the sample of 100 nodes
sample_graph = g.subgraph(node_sample_ids)

In [66]:
# define a Node2Vec model, and extract the walks from this model
node2vec = Node2Vec(sample_graph)
walks = node2vec.walks

# now, apply a Word2Vec model to the walks that we obtained, and set the vector size to 25 to obtain embeddings of size 25
modelw2v = Word2Vec(walks, vector_size=25)

Computing transition probabilities: 100%|██████████| 100/100 [00:00<00:00, 30817.81it/s]
Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 1738.93it/s]


In [67]:
# obtain the embeddings our sample of 100 nodes
node_embeddings = {node: modelw2v.wv[node] for node in sample_graph.nodes()}
node_embeddings

In [70]:
list_of_embeddings = []
for key, value in node_embeddings.items():
    list_of_embeddings.append({key : value})