In [1]:
import json
import numpy as np
import torch
import scipy.sparse as sparse
from ogb.graphproppred import GraphPropPredDataset
from torch_geometric.loader import DataLoader

# Download and process data at './dataset/ogbg_molsider/'

dataset = GraphPropPredDataset(name = 'ogbg-molsider')


split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]

  from .autonotebook import tqdm as notebook_tqdm


Downloading http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/sider.zip


Downloaded 0.00 GB: 100%|██████████| 2/2 [00:00<00:00,  8.88it/s]


Extracting dataset/sider.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1427/1427 [00:00<00:00, 58579.21it/s]


Saving...


In [2]:
print(dataset[0][0].keys())

dict_keys(['edge_index', 'edge_feat', 'node_feat', 'num_nodes'])


In [3]:
from tqdm import tqdm
node_list = []
labels = []
edges = []
edge_feats = []
node_feats = []
num_nodes = 0

In [4]:
for g, label in tqdm(dataset):
    node_list.append(np.arange(g["num_nodes"]) + num_nodes)  # All the nodes are considered in a single graph
    edges.append(np.stack(g["edge_index"]).T + num_nodes)
    edge_feats.append(g["edge_feat"])
    node_feats.append(g["node_feat"])
    labels.append(label)
    num_nodes += g["num_nodes"]

100%|██████████| 1427/1427 [00:00<00:00, 41557.17it/s]


In [5]:
_edges = np.concatenate(edges, axis=0)
_labels = np.stack(labels).squeeze()
_edge_feats = np.concatenate(edge_feats)
_node_feats = np.concatenate(node_feats)

In [6]:
rows = []
for r, col in enumerate(node_list):
    rows.append(np.ones(len(col)) * r)
rows = np.concatenate(rows)
data = np.ones_like(rows)
cols = np.concatenate(node_list)
_node_list = sparse.coo_matrix((data, (rows, cols)), shape=(len(node_list), num_nodes))

In [7]:
from gli.utils import save_data
data = {
    "node_feats": _node_feats,
    "edge": _edges,
    "graph_class": _labels,
    "edge_feats": _edge_feats,
    "node_list": _node_list,
}
save_data("ogbg-molsider", **data)

Save all dense arrays to ogbg-molsider.npz, including ['node_feats', 'edge', 'graph_class', 'edge_feats']
Save sparse matrix node_list to ogbg-molsider_node_list.sparse.npz


In [8]:
task_data={
    "train" :train_idx,
    "val" :valid_idx,
    "test" :test_idx
}
save_data("ogbg-molsider_task",**task_data)

Save all dense arrays to ogbg-molsider_task.npz, including ['train', 'val', 'test']
