In [None]:
import torch
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric import loader

dataset = PygNodePropPredDataset(name = "ogbn-products", root = 'dataset/')
 
split_idx = dataset.get_idx_split()
data = dataset[0]

# Convert split indices to boolean masks and add them to `data`.
for key, idx in split_idx.items():
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[idx] = True
    data[f'{key}_mask'] = mask
    

In [None]:
import torch
from torch_geometric import loader
from torch_geometric.data import Data
from torch_geometric.nn import Node2Vec

def node2vec(data, walk_length, context_size, walks_per_node)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products')
    data = dataset[0]

    model = Node2Vec(data.edge_index, 100, walk_length, context_size, walks_per_node, sparse=True).to(device)

    loader = model.loader(batch_size=256, shuffle=True,
                          num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

    model.train()
    for epoch in range(1, 2):
        for i, (pos_rw, neg_rw) in enumerate(loader):
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()

            print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, '
                  f'Loss: {loss:.4f}')

    return model.embedding.weight.data.cpu()


In [None]:
cluster_data = loader.ClusterData(data, num_parts=1000, recursive=False)

# rp = GaussianRandomProjection(n_components=dataset.num_features)
# cluster_data.data.x = torch.from_numpy(rp.fit_transform(cluster_data.data.x))

with torch.no_grad():
    for cl in cluster_data:
#         labels = torch.squeeze(cl.y).clone().detach()
#         labels[cl.valid_mask] = -1
#         labels[cl.test_mask] = -1
        cl.x = node2vec(cl.x, 40, 20 10)

op_dict = {}
op_dict['embedding'] = cluster_data.data.x
op_dict['label'] = cluster_data.data.y.to(torch.long)
op_dict['train_idx'] = cluster_data.data.train_mask
op_dict['valid_idx'] = cluster_data.data.valid_mask
op_dict['test_idx'] = cluster_data.data.test_mask

torch.save(op_dict, '{}.pt'.format("NODE2VEC_EMBEDDINGS"))