In [1]:
# source: https://ogb.stanford.edu/docs/linkprop/#ogbl-collab
import numpy as np
import torch
from ogb.linkproppred import LinkPropPredDataset

dataset = LinkPropPredDataset(name = 'ogbl-collab')

split_edge = dataset.get_edge_split()
train_edge, valid_edge, test_edge = split_edge["train"], split_edge["valid"], split_edge["test"]
graph = dataset[0]


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from gli.utils import save_data
task_data = {
    # use edges (N, 2) to save negative edges
    "val_neg": valid_edge['edge_neg'],
    "test_neg": test_edge['edge_neg']
}
save_data("ogbl-collab_task_prestore_neg", **task_data)

Save all dense arrays to ogbl-collab_task_prestore_neg.npz, including ['val_neg', 'test_neg']


In [3]:
edges = train_edge
for e in (valid_edge, test_edge):
    for t in ("edge", "weight", "year"):
        edges[t] = np.concatenate((edges[t], e[t]))


import dgl
g = dgl.graph((edges["edge"].T[0], edges["edge"].T[1]))
g.edata["weight"] = torch.from_numpy(edges["weight"])
g.edata["year"] = torch.from_numpy(edges["year"])
g.ndata["feat"] = torch.from_numpy(graph["node_feat"])

In [4]:
torch.stack(g.edges()).T

tensor([[150989, 224881],
        [150989, 224881],
        [180078, 199043],
        ...,
        [ 47058, 190305],
        [216257, 190305],
        [ 32459, 190305]])

In [5]:
node_feats = graph['node_feat'] # (235868, 128)
edge_year = g.edata["year"].numpy()
edge_weight = g.edata["weight"].numpy() # (2358104, )
edge = torch.stack(g.edges()).T.numpy() # (2358104, 2)
node_list = np.ones((1, graph['num_nodes']))
edge_list = np.ones((1, edge.shape[0]))
edge_id = np.arange(0, edge.shape[0])

data = {
    "node_feats": node_feats,
    "edge_year": edge_year,
    "edge_weight":edge_weight,
    "edge": edge,
    "node_list": node_list,
    "edge_list": edge_list,
    "edge_id": edge_id
}

In [6]:
for k, v in data.items():
    print(k, v.shape)

node_feats (235868, 128)
edge_year (1285465,)
edge_weight (1285465,)
edge (1285465, 2)
node_list (1, 235868)
edge_list (1, 1285465)
edge_id (1285465,)


In [7]:
save_data("ogbl-collab", **data)

Save all dense arrays to ogbl-collab.npz, including ['node_feats', 'edge_year', 'edge_weight', 'edge', 'node_list', 'edge_list', 'edge_id']
