In [5]:
from torch_geometric.datasets import Planetoid

import torch_geometric.transforms as T

# Load the CORA dataset
dataset = Planetoid(root='../Cora', name='Cora')

# Get the data object
data = dataset[0]

In [6]:
# Now data contains:
# data.x: Node features
# data.edge_index: Graph connectivity
# data.y: Node labels
# data.train_mask/val_mask/test_mask: Masks for splitting the dataset

print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of node features: {data.num_features}")
print(f"Number of classes: {dataset.num_classes}")

Number of nodes: 2708
Number of edges: 10556
Number of node features: 1433
Number of classes: 7


In [7]:
import networkx as nx

graph_nx = nx.Graph()
graph_nx.add_edges_from(data.edge_index.t().tolist())

In [8]:
list(graph_nx.neighbors(13))

[1701, 1810]

In [9]:
import torch
import random


def RandomWalk(G, v, walk_length):
    walk = [v]
    while len(walk) < walk_length:
        neighbors = list(G.neighbors(v))
        if not neighbors:
            break
        v = random.choice(neighbors)
        walk.append(v)
    
    return walk

def deeepwalk(graph_nx, walk_length, walks_per_node):
    num_nodes = graph_nx.number_of_nodes()
    random_walks = []
    for i in range(walks_per_node):
        O = list(graph_nx.nodes()).copy()
        random.shuffle(O)
        for vi in O:
            Wvi = RandomWalk(graph_nx, vi, walk_length)
            random_walks.append(Wvi)
    return random_walks

In [10]:
sentences = deeepwalk(graph_nx, walk_length=10, walks_per_node=5)

In [11]:
len(sentences)

13540

In [12]:
# Save walks to a file
with open("walks.txt", "w") as f:
    for walk in sentences:
        f.write(" ".join(map(str, walk)) + "\n")


In [13]:
import fasttext

model = fasttext.train_unsupervised(
    input='walks.txt',
    model='skipgram',
    dim=128,
    ws=5,
    epoch=10,
    minCount=0,
    neg=0,
    loss='hs',
    thread=4,
    minn=0, maxn=0  # no subwords; pure Word2Vec behavior
)

# Get vector for node 2
vector = model.get_word_vector("2")
print(vector.shape)

Read 0M words
Number of words:  2709
Number of labels: 0
Progress:  94.0% words/sec/thread:  177998 lr:  0.003017 avg.loss:  3.645911 ETA:   0h 0m 0s

(128,)


Progress: 100.0% words/sec/thread:  171212 lr:  0.000000 avg.loss:  3.624417 ETA:   0h 0m 0s


In [14]:
model.get_word_vector("0")

array([-0.03472757,  0.6215284 ,  0.28486943, -0.72974044, -0.00990918,
       -0.42335024,  0.08379763,  0.81374973, -0.11037952, -0.16239147,
        0.18493341,  0.3711851 ,  0.43720156, -0.5358561 , -0.3020294 ,
        0.21139073,  0.30173165, -0.47209182, -0.47215852,  0.17773363,
        0.6400696 ,  0.03976898,  0.23814403, -0.8673644 ,  1.2291881 ,
       -0.60600835,  0.27004325, -0.01542067,  0.15250923, -0.06684037,
        0.11416901, -0.22817133, -0.45182028,  0.5675845 ,  0.26174116,
       -0.3990684 ,  0.50900084, -1.0375887 , -0.21104032,  0.04796304,
        0.57274956, -0.53301686,  0.30129686, -0.5607405 , -0.37655148,
       -0.03341389,  0.19673863, -0.5518809 ,  0.59004873,  0.4023021 ,
        0.2717514 ,  0.77717394,  0.03416823,  0.6869815 , -0.18897021,
       -0.7234986 , -0.29557973, -0.05016053,  0.5464305 ,  0.14230795,
       -0.06687036, -0.41236076,  0.75588274, -0.01745494,  0.00138288,
        0.2882936 , -0.20061965,  0.22463915,  0.6639795 ,  0.26