In [1]:
import argparse

import torch
from torch_geometric.nn import Node2Vec

from ogb.nodeproppred import PygNodePropPredDataset



In [2]:
def save_embedding(model):
    torch.save(model.embedding.weight.data.cpu(), 'embedding.pt')

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_layers = 2
dropout = 0.5
epochs = 1
batch_size = 128
walk_length = 20
lr = 0.01
log_steps = 100
walks_per_node = 1
context_size = 10
embedding_dim = 64

dataset = PygNodePropPredDataset(name='ogbn-products')
data = dataset[0]

In [4]:
model = Node2Vec(data.edge_index,embedding_dim, walk_length,
                    context_size, walks_per_node,
                    sparse=True).to(device)

loader = model.loader(batch_size=batch_size, shuffle=True,
                        num_workers=0)
optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=lr)

model.train()
for epoch in range(1, epochs + 1):
    for i, (pos_rw, neg_rw) in enumerate(loader):
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()

        if (i + 1) % log_steps == 0:
            print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, 'f'Loss: {loss:.4f}')
    save_embedding(model)

Epoch: 01, Step: 100/19134, Loss: 6.7755
Epoch: 01, Step: 200/19134, Loss: 6.6742
Epoch: 01, Step: 300/19134, Loss: 6.6339
Epoch: 01, Step: 400/19134, Loss: 6.5887
Epoch: 01, Step: 500/19134, Loss: 6.4945
Epoch: 01, Step: 600/19134, Loss: 6.4333
Epoch: 01, Step: 700/19134, Loss: 6.5697
Epoch: 01, Step: 800/19134, Loss: 6.6184
Epoch: 01, Step: 900/19134, Loss: 6.4451
Epoch: 01, Step: 1000/19134, Loss: 6.5900
Epoch: 01, Step: 1100/19134, Loss: 6.4612
Epoch: 01, Step: 1200/19134, Loss: 6.3273
Epoch: 01, Step: 1300/19134, Loss: 6.4619
Epoch: 01, Step: 1400/19134, Loss: 6.3759
Epoch: 01, Step: 1500/19134, Loss: 6.2002
Epoch: 01, Step: 1600/19134, Loss: 6.3404
Epoch: 01, Step: 1700/19134, Loss: 6.1633
Epoch: 01, Step: 1800/19134, Loss: 6.1413
Epoch: 01, Step: 1900/19134, Loss: 6.2510
Epoch: 01, Step: 2000/19134, Loss: 5.9049
Epoch: 01, Step: 2100/19134, Loss: 6.0133
Epoch: 01, Step: 2200/19134, Loss: 6.0696
Epoch: 01, Step: 2300/19134, Loss: 6.0149
Epoch: 01, Step: 2400/19134, Loss: 6.1405
E

In [5]:
model.eval()
z = model()

In [6]:
x_mask = train_mask = torch.zeros(data.num_nodes,dtype=torch.long, device=device)
x_mask[0] = 1
node_embeddings = z

In [7]:
from sklearn.tree import DecisionTreeClassifier
# from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.model_selection import train_test_split

In [8]:
import numpy as np

X = node_embeddings.cpu().detach().numpy()
y = data.y.cpu().detach().numpy()


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.8,test_size=0.2,random_state=10)

In [10]:
dtree_model = DecisionTreeClassifier(max_depth = 8).fit(X_train, y_train)
dtree_predictions = dtree_model.predict(X_test)

In [11]:
cm = confusion_matrix(y_test, dtree_predictions)

In [15]:
print(accuracy_score(y_test, dtree_predictions))

0.7732183762550888
