In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
import numpy as np
os.environ['TORCH'] = torch.__version__
os.environ['DGLBACKEND'] = "pytorch"
import dgl
import dgl.graphbolt as gb
# Install the CPU version.
device = torch.device("cpu")

OnDiskDataset for Heterogeneous Graph

In [2]:
base_dir = '../../../data/dgl/ondisk_dataset_heterograph'
os.makedirs(base_dir, exist_ok=True)
print(f"Created base directory: {base_dir}")

Created base directory: ../../../data/dgl/ondisk_dataset_heterograph


In [4]:
import numpy as np
import pandas as pd

# For simplicity, we create a heterogeneous graph with
# 2 node types: `user`, `item`
# 2 edge types: `user:like:item`, `user:follow:user`
# And each node/edge type has the same number of nodes/edges.
num_nodes = 1000
num_edges = 10 * num_nodes

# Edge type: "user:like:item"
like_edges_path = os.path.join(base_dir, "like-edges.csv")
like_edges = np.random.randint(0, num_nodes, size=(num_edges, 2))
print(f"Part of [user:like:item] edges: {like_edges[:5, :]}\n")

df = pd.DataFrame(like_edges)
df.to_csv(like_edges_path, index=False, header=False)
print(f"[user:like:item] edges are saved into {like_edges_path}\n")
# Edge type: "user:follow:user"
follow_edges_path = os.path.join(base_dir, "follow-edges.csv")
follow_edges = np.random.randint(0, num_nodes, size=(num_edges, 2))
print(f"Part of [user:follow:user] edges: {follow_edges[:5, :]}\n")

df = pd.DataFrame(follow_edges)
df.to_csv(follow_edges_path, index=False, header=False)
print(f"[user:follow:user] edges are saved into {follow_edges_path}\n")

Part of [user:like:item] edges: [[806 171]
 [788 606]
 [ 95  13]
 [630 581]
 [577 563]]

[user:like:item] edges are saved into ../../../data/dgl/ondisk_dataset_heterograph\like-edges.csv

Part of [user:follow:user] edges: [[668 640]
 [335  69]
 [582 800]
 [261 347]
 [288 639]]

[user:follow:user] edges are saved into ../../../data/dgl/ondisk_dataset_heterograph\follow-edges.csv



Generate feature data for graph¶

In [5]:
# Generate node[user] feature in numpy array.
node_user_feat_0_path = os.path.join(base_dir, "node-user-feat-0.npy")
node_user_feat_0 = np.random.rand(num_nodes, 5)
print(f"Part of node[user] feature [feat_0]: {node_user_feat_0[:3, :]}")
np.save(node_user_feat_0_path, node_user_feat_0)
print(f"Node[user] feature [feat_0] is saved to {node_user_feat_0_path}\n")

# Generate another node[user] feature in torch tensor
node_user_feat_1_path = os.path.join(base_dir, "node-user-feat-1.pt")
node_user_feat_1 = torch.rand(num_nodes, 5)
print(f"Part of node[user] feature [feat_1]: {node_user_feat_1[:3, :]}")
torch.save(node_user_feat_1, node_user_feat_1_path)
print(f"Node[user] feature [feat_1] is saved to {node_user_feat_1_path}\n")

# Generate node[item] feature in numpy array.
node_item_feat_0_path = os.path.join(base_dir, "node-item-feat-0.npy")
node_item_feat_0 = np.random.rand(num_nodes, 5)
print(f"Part of node[item] feature [feat_0]: {node_item_feat_0[:3, :]}")
np.save(node_item_feat_0_path, node_item_feat_0)
print(f"Node[item] feature [feat_0] is saved to {node_item_feat_0_path}\n")

# Generate another node[item] feature in torch tensor
node_item_feat_1_path = os.path.join(base_dir, "node-item-feat-1.pt")
node_item_feat_1 = torch.rand(num_nodes, 5)
print(f"Part of node[item] feature [feat_1]: {node_item_feat_1[:3, :]}")
torch.save(node_item_feat_1, node_item_feat_1_path)
print(f"Node[item] feature [feat_1] is saved to {node_item_feat_1_path}\n")

# Generate edge[user:like:item] feature in numpy array.
edge_like_feat_0_path = os.path.join(base_dir, "edge-like-feat-0.npy")
edge_like_feat_0 = np.random.rand(num_edges, 5)
print(f"Part of edge[user:like:item] feature [feat_0]: {edge_like_feat_0[:3, :]}")
np.save(edge_like_feat_0_path, edge_like_feat_0)
print(f"Edge[user:like:item] feature [feat_0] is saved to {edge_like_feat_0_path}\n")

# Generate another edge[user:like:item] feature in torch tensor
edge_like_feat_1_path = os.path.join(base_dir, "edge-like-feat-1.pt")
edge_like_feat_1 = torch.rand(num_edges, 5)
print(f"Part of edge[user:like:item] feature [feat_1]: {edge_like_feat_1[:3, :]}")
torch.save(edge_like_feat_1, edge_like_feat_1_path)
print(f"Edge[user:like:item] feature [feat_1] is saved to {edge_like_feat_1_path}\n")

# Generate edge[user:follow:user] feature in numpy array.
edge_follow_feat_0_path = os.path.join(base_dir, "edge-follow-feat-0.npy")
edge_follow_feat_0 = np.random.rand(num_edges, 5)
print(f"Part of edge[user:follow:user] feature [feat_0]: {edge_follow_feat_0[:3, :]}")
np.save(edge_follow_feat_0_path, edge_follow_feat_0)
print(f"Edge[user:follow:user] feature [feat_0] is saved to {edge_follow_feat_0_path}\n")

# Generate another edge[user:follow:user] feature in torch tensor
edge_follow_feat_1_path = os.path.join(base_dir, "edge-follow-feat-1.pt")
edge_follow_feat_1 = torch.rand(num_edges, 5)
print(f"Part of edge[user:follow:user] feature [feat_1]: {edge_follow_feat_1[:3, :]}")
torch.save(edge_follow_feat_1, edge_follow_feat_1_path)
print(f"Edge[user:follow:user] feature [feat_1] is saved to {edge_follow_feat_1_path}\n")

Part of node[user] feature [feat_0]: [[0.80101537 0.51340171 0.84299051 0.90706212 0.78711545]
 [0.80348895 0.23224867 0.77173095 0.94571726 0.74264715]
 [0.83609487 0.16785276 0.16407082 0.50717549 0.57004077]]
Node[user] feature [feat_0] is saved to ../../../data/dgl/ondisk_dataset_heterograph\node-user-feat-0.npy

Part of node[user] feature [feat_1]: tensor([[0.5484, 0.9275, 0.1277, 0.0284, 0.6647],
        [0.9776, 0.2677, 0.4333, 0.2919, 0.9224],
        [0.1965, 0.2762, 0.3769, 0.6497, 0.1985]])
Node[user] feature [feat_1] is saved to ../../../data/dgl/ondisk_dataset_heterograph\node-user-feat-1.pt

Part of node[item] feature [feat_0]: [[0.64508235 0.27072738 0.4301315  0.25681379 0.95190907]
 [0.47436373 0.48809612 0.78121623 0.68208359 0.7024437 ]
 [0.28787125 0.87666685 0.88608175 0.80838994 0.60949326]]
Node[item] feature [feat_0] is saved to ../../../data/dgl/ondisk_dataset_heterograph\node-item-feat-0.npy

Part of node[item] feature [feat_1]: tensor([[0.6332, 0.8723, 0.1656

Node Classification Task

In [6]:
# For illustration, let's generate item sets for each node type.
num_trains = int(num_nodes * 0.6)
num_vals = int(num_nodes * 0.2)
num_tests = num_nodes - num_trains - num_vals

user_ids = np.arange(num_nodes)
np.random.shuffle(user_ids)

item_ids = np.arange(num_nodes)
np.random.shuffle(item_ids)

# Train IDs for user.
nc_train_user_ids_path = os.path.join(base_dir, "nc-train-user-ids.npy")
nc_train_user_ids = user_ids[:num_trains]
print(f"Part of train ids[user] for node classification: {nc_train_user_ids[:3]}")
np.save(nc_train_user_ids_path, nc_train_user_ids)
print(f"NC train ids[user] are saved to {nc_train_user_ids_path}\n")

# Train labels for user.
nc_train_user_labels_path = os.path.join(base_dir, "nc-train-user-labels.pt")
nc_train_user_labels = torch.randint(0, 10, (num_trains,))
print(f"Part of train labels[user] for node classification: {nc_train_user_labels[:3]}")
torch.save(nc_train_user_labels, nc_train_user_labels_path)
print(f"NC train labels[user] are saved to {nc_train_user_labels_path}\n")

# Train IDs for item.
nc_train_item_ids_path = os.path.join(base_dir, "nc-train-item-ids.npy")
nc_train_item_ids = item_ids[:num_trains]
print(f"Part of train ids[item] for node classification: {nc_train_item_ids[:3]}")
np.save(nc_train_item_ids_path, nc_train_item_ids)
print(f"NC train ids[item] are saved to {nc_train_item_ids_path}\n")

# Train labels for item.
nc_train_item_labels_path = os.path.join(base_dir, "nc-train-item-labels.pt")
nc_train_item_labels = torch.randint(0, 10, (num_trains,))
print(f"Part of train labels[item] for node classification: {nc_train_item_labels[:3]}")
torch.save(nc_train_item_labels, nc_train_item_labels_path)
print(f"NC train labels[item] are saved to {nc_train_item_labels_path}\n")

# Val IDs for user.
nc_val_user_ids_path = os.path.join(base_dir, "nc-val-user-ids.npy")
nc_val_user_ids = user_ids[num_trains:num_trains+num_vals]
print(f"Part of val ids[user] for node classification: {nc_val_user_ids[:3]}")
np.save(nc_val_user_ids_path, nc_val_user_ids)
print(f"NC val ids[user] are saved to {nc_val_user_ids_path}\n")

# Val labels for user.
nc_val_user_labels_path = os.path.join(base_dir, "nc-val-user-labels.pt")
nc_val_user_labels = torch.randint(0, 10, (num_vals,))
print(f"Part of val labels[user] for node classification: {nc_val_user_labels[:3]}")
torch.save(nc_val_user_labels, nc_val_user_labels_path)
print(f"NC val labels[user] are saved to {nc_val_user_labels_path}\n")

# Val IDs for item.
nc_val_item_ids_path = os.path.join(base_dir, "nc-val-item-ids.npy")
nc_val_item_ids = item_ids[num_trains:num_trains+num_vals]
print(f"Part of val ids[item] for node classification: {nc_val_item_ids[:3]}")
np.save(nc_val_item_ids_path, nc_val_item_ids)
print(f"NC val ids[item] are saved to {nc_val_item_ids_path}\n")

# Val labels for item.
nc_val_item_labels_path = os.path.join(base_dir, "nc-val-item-labels.pt")
nc_val_item_labels = torch.randint(0, 10, (num_vals,))
print(f"Part of val labels[item] for node classification: {nc_val_item_labels[:3]}")
torch.save(nc_val_item_labels, nc_val_item_labels_path)
print(f"NC val labels[item] are saved to {nc_val_item_labels_path}\n")

# Test IDs for user.
nc_test_user_ids_path = os.path.join(base_dir, "nc-test-user-ids.npy")
nc_test_user_ids = user_ids[-num_tests:]
print(f"Part of test ids[user] for node classification: {nc_test_user_ids[:3]}")
np.save(nc_test_user_ids_path, nc_test_user_ids)
print(f"NC test ids[user] are saved to {nc_test_user_ids_path}\n")

# Test labels for user.
nc_test_user_labels_path = os.path.join(base_dir, "nc-test-user-labels.pt")
nc_test_user_labels = torch.randint(0, 10, (num_tests,))
print(f"Part of test labels[user] for node classification: {nc_test_user_labels[:3]}")
torch.save(nc_test_user_labels, nc_test_user_labels_path)
print(f"NC test labels[user] are saved to {nc_test_user_labels_path}\n")

# Test IDs for item.
nc_test_item_ids_path = os.path.join(base_dir, "nc-test-item-ids.npy")
nc_test_item_ids = item_ids[-num_tests:]
print(f"Part of test ids[item] for node classification: {nc_test_item_ids[:3]}")
np.save(nc_test_item_ids_path, nc_test_item_ids)
print(f"NC test ids[item] are saved to {nc_test_item_ids_path}\n")

# Test labels for item.
nc_test_item_labels_path = os.path.join(base_dir, "nc-test-item-labels.pt")
nc_test_item_labels = torch.randint(0, 10, (num_tests,))
print(f"Part of test labels[item] for node classification: {nc_test_item_labels[:3]}")
torch.save(nc_test_item_labels, nc_test_item_labels_path)
print(f"NC test labels[item] are saved to {nc_test_item_labels_path}\n")

Part of train ids[user] for node classification: [ 94 829 999]
NC train ids[user] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-train-user-ids.npy

Part of train labels[user] for node classification: tensor([0, 0, 1])
NC train labels[user] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-train-user-labels.pt

Part of train ids[item] for node classification: [308 449 368]
NC train ids[item] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-train-item-ids.npy

Part of train labels[item] for node classification: tensor([9, 6, 4])
NC train labels[item] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-train-item-labels.pt

Part of val ids[user] for node classification: [741 462 915]
NC val ids[user] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-val-user-ids.npy

Part of val labels[user] for node classification: tensor([1, 6, 7])
NC val labels[user] are saved to ../../../data/dgl/ondisk_dataset_heterograph\nc-val-user-labels.pt

Link Prediction Task¶

In [7]:
# For illustration, let's generate item sets for each edge type.
num_trains = int(num_edges * 0.6)
num_vals = int(num_edges * 0.2)
num_tests = num_edges - num_trains - num_vals

# Train node pairs for user:like:item.
lp_train_like_node_pairs_path = os.path.join(base_dir, "lp-train-like-node-pairs.npy")
lp_train_like_node_pairs = like_edges[:num_trains, :]
print(f"Part of train node pairs[user:like:item] for link prediction: {lp_train_like_node_pairs[:3]}")
np.save(lp_train_like_node_pairs_path, lp_train_like_node_pairs)
print(f"LP train node pairs[user:like:item] are saved to {lp_train_like_node_pairs_path}\n")

# Train node pairs for user:follow:user.
lp_train_follow_node_pairs_path = os.path.join(base_dir, "lp-train-follow-node-pairs.npy")
lp_train_follow_node_pairs = follow_edges[:num_trains, :]
print(f"Part of train node pairs[user:follow:user] for link prediction: {lp_train_follow_node_pairs[:3]}")
np.save(lp_train_follow_node_pairs_path, lp_train_follow_node_pairs)
print(f"LP train node pairs[user:follow:user] are saved to {lp_train_follow_node_pairs_path}\n")

# Val node pairs for user:like:item.
lp_val_like_node_pairs_path = os.path.join(base_dir, "lp-val-like-node-pairs.npy")
lp_val_like_node_pairs = like_edges[num_trains:num_trains+num_vals, :]
print(f"Part of val node pairs[user:like:item] for link prediction: {lp_val_like_node_pairs[:3]}")
np.save(lp_val_like_node_pairs_path, lp_val_like_node_pairs)
print(f"LP val node pairs[user:like:item] are saved to {lp_val_like_node_pairs_path}\n")

# Val negative dsts for user:like:item.
lp_val_like_neg_dsts_path = os.path.join(base_dir, "lp-val-like-neg-dsts.pt")
lp_val_like_neg_dsts = torch.randint(0, num_nodes, (num_vals, 10))
print(f"Part of val negative dsts[user:like:item] for link prediction: {lp_val_like_neg_dsts[:3]}")
torch.save(lp_val_like_neg_dsts, lp_val_like_neg_dsts_path)
print(f"LP val negative dsts[user:like:item] are saved to {lp_val_like_neg_dsts_path}\n")

# Val node pairs for user:follow:user.
lp_val_follow_node_pairs_path = os.path.join(base_dir, "lp-val-follow-node-pairs.npy")
lp_val_follow_node_pairs = follow_edges[num_trains:num_trains+num_vals, :]
print(f"Part of val node pairs[user:follow:user] for link prediction: {lp_val_follow_node_pairs[:3]}")
np.save(lp_val_follow_node_pairs_path, lp_val_follow_node_pairs)
print(f"LP val node pairs[user:follow:user] are saved to {lp_val_follow_node_pairs_path}\n")

# Val negative dsts for user:follow:user.
lp_val_follow_neg_dsts_path = os.path.join(base_dir, "lp-val-follow-neg-dsts.pt")
lp_val_follow_neg_dsts = torch.randint(0, num_nodes, (num_vals, 10))
print(f"Part of val negative dsts[user:follow:user] for link prediction: {lp_val_follow_neg_dsts[:3]}")
torch.save(lp_val_follow_neg_dsts, lp_val_follow_neg_dsts_path)
print(f"LP val negative dsts[user:follow:user] are saved to {lp_val_follow_neg_dsts_path}\n")

# Test node paris for user:like:item.
lp_test_like_node_pairs_path = os.path.join(base_dir, "lp-test-like-node-pairs.npy")
lp_test_like_node_pairs = like_edges[-num_tests, :]
print(f"Part of test node pairs[user:like:item] for link prediction: {lp_test_like_node_pairs[:3]}")
np.save(lp_test_like_node_pairs_path, lp_test_like_node_pairs)
print(f"LP test node pairs[user:like:item] are saved to {lp_test_like_node_pairs_path}\n")

# Test negative dsts for user:like:item.
lp_test_like_neg_dsts_path = os.path.join(base_dir, "lp-test-like-neg-dsts.pt")
lp_test_like_neg_dsts = torch.randint(0, num_nodes, (num_tests, 10))
print(f"Part of test negative dsts[user:like:item] for link prediction: {lp_test_like_neg_dsts[:3]}")
torch.save(lp_test_like_neg_dsts, lp_test_like_neg_dsts_path)
print(f"LP test negative dsts[user:like:item] are saved to {lp_test_like_neg_dsts_path}\n")

# Test node paris for user:follow:user.
lp_test_follow_node_pairs_path = os.path.join(base_dir, "lp-test-follow-node-pairs.npy")
lp_test_follow_node_pairs = follow_edges[-num_tests, :]
print(f"Part of test node pairs[user:follow:user] for link prediction: {lp_test_follow_node_pairs[:3]}")
np.save(lp_test_follow_node_pairs_path, lp_test_follow_node_pairs)
print(f"LP test node pairs[user:follow:user] are saved to {lp_test_follow_node_pairs_path}\n")

# Test negative dsts for user:follow:user.
lp_test_follow_neg_dsts_path = os.path.join(base_dir, "lp-test-follow-neg-dsts.pt")
lp_test_follow_neg_dsts = torch.randint(0, num_nodes, (num_tests, 10))
print(f"Part of test negative dsts[user:follow:user] for link prediction: {lp_test_follow_neg_dsts[:3]}")
torch.save(lp_test_follow_neg_dsts, lp_test_follow_neg_dsts_path)
print(f"LP test negative dsts[user:follow:user] are saved to {lp_test_follow_neg_dsts_path}\n")

Part of train node pairs[user:like:item] for link prediction: [[806 171]
 [788 606]
 [ 95  13]]
LP train node pairs[user:like:item] are saved to ../../../data/dgl/ondisk_dataset_heterograph\lp-train-like-node-pairs.npy

Part of train node pairs[user:follow:user] for link prediction: [[668 640]
 [335  69]
 [582 800]]
LP train node pairs[user:follow:user] are saved to ../../../data/dgl/ondisk_dataset_heterograph\lp-train-follow-node-pairs.npy

Part of val node pairs[user:like:item] for link prediction: [[547 235]
 [235 412]
 [550 498]]
LP val node pairs[user:like:item] are saved to ../../../data/dgl/ondisk_dataset_heterograph\lp-val-like-node-pairs.npy

Part of val negative dsts[user:like:item] for link prediction: tensor([[632, 484, 252, 525, 653, 696, 770, 866, 375, 955],
        [992, 983, 272,  24, 995, 258, 285, 806, 379, 132],
        [915, 895, 790, 865, 923, 729, 348,  26, 764, 563]])
LP val negative dsts[user:like:item] are saved to ../../../data/dgl/ondisk_dataset_heterograph\l

 YAML specification for more details

In [8]:
yaml_content = f"""
    dataset_name: heterogeneous_graph_nc_lp
    graph:
      nodes:
        - type: user
          num: {num_nodes}
        - type: item
          num: {num_nodes}
      edges:
        - type: "user:like:item"
          format: csv
          path: {os.path.basename(like_edges_path)}
        - type: "user:follow:user"
          format: csv
          path: {os.path.basename(follow_edges_path)}
    feature_data:
      - domain: node
        type: user
        name: feat_0
        format: numpy
        path: {os.path.basename(node_user_feat_0_path)}
      - domain: node
        type: user
        name: feat_1
        format: torch
        path: {os.path.basename(node_user_feat_1_path)}
      - domain: node
        type: item
        name: feat_0
        format: numpy
        path: {os.path.basename(node_item_feat_0_path)}
      - domain: node
        type: item
        name: feat_1
        format: torch
        path: {os.path.basename(node_item_feat_1_path)}
      - domain: edge
        type: "user:like:item"
        name: feat_0
        format: numpy
        path: {os.path.basename(edge_like_feat_0_path)}
      - domain: edge
        type: "user:like:item"
        name: feat_1
        format: torch
        path: {os.path.basename(edge_like_feat_1_path)}
      - domain: edge
        type: "user:follow:user"
        name: feat_0
        format: numpy
        path: {os.path.basename(edge_follow_feat_0_path)}
      - domain: edge
        type: "user:follow:user"
        name: feat_1
        format: torch
        path: {os.path.basename(edge_follow_feat_1_path)}
    tasks:
      - name: node_classification
        num_classes: 10
        train_set:
          - type: user
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_train_user_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_train_user_labels_path)}
          - type: item
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_train_item_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_train_item_labels_path)}
        validation_set:
          - type: user
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_val_user_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_val_user_labels_path)}
          - type: item
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_val_item_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_val_item_labels_path)}
        test_set:
          - type: user
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_test_user_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_test_user_labels_path)}
          - type: item
            data:
              - name: seed_nodes
                format: numpy
                path: {os.path.basename(nc_test_item_ids_path)}
              - name: labels
                format: torch
                path: {os.path.basename(nc_test_item_labels_path)}
      - name: link_prediction
        num_classes: 10
        train_set:
          - type: "user:like:item"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_train_like_node_pairs_path)}
          - type: "user:follow:user"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_train_follow_node_pairs_path)}
        validation_set:
          - type: "user:like:item"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_val_like_node_pairs_path)}
              - name: negative_dsts
                format: torch
                path: {os.path.basename(lp_val_like_neg_dsts_path)}
          - type: "user:follow:user"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_val_follow_node_pairs_path)}
              - name: negative_dsts
                format: torch
                path: {os.path.basename(lp_val_follow_neg_dsts_path)}
        test_set:
          - type: "user:like:item"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_test_like_node_pairs_path)}
              - name: negative_dsts
                format: torch
                path: {os.path.basename(lp_test_like_neg_dsts_path)}
          - type: "user:follow:user"
            data:
              - name: node_pairs
                format: numpy
                path: {os.path.basename(lp_test_follow_node_pairs_path)}
              - name: negative_dsts
                format: torch
                path: {os.path.basename(lp_test_follow_neg_dsts_path)}
"""
metadata_path = os.path.join(base_dir, "metadata.yaml")
with open(metadata_path, "w") as f:
  f.write(yaml_content)

load save data 

In [12]:
dataset = gb.OnDiskDataset(base_dir).load(tasks="node_classification")
graph = dataset.graph
print(f"Loaded graph: {graph}\n")

feature = dataset.feature
print(f"Loaded feature store: {feature}\n")

tasks = dataset.tasks
# nc_task = tasks[0]
# print(f"Loaded node classification task: {nc_task}\n")
lp_task = tasks[0]
print(f"Loaded link prediction task: {lp_task}\n")

The dataset is already preprocessed.
Loaded graph: FusedCSCSamplingGraph(csc_indptr=tensor([    0,    11,    25,  ..., 19978, 19988, 20000], dtype=torch.int32),
                      indices=tensor([1659, 1975, 1440,  ..., 1582, 1096, 1106], dtype=torch.int32),
                      total_num_nodes=2000, num_edges={'user:follow:user': 10000, 'user:like:item': 10000},
                      node_type_offset=tensor([   0, 1000, 2000], dtype=torch.int32),
                      type_per_edge=tensor([1, 1, 1,  ..., 0, 0, 0], dtype=torch.uint8),
                      node_type_to_id={'item': 0, 'user': 1},
                      edge_type_to_id={'user:follow:user': 0, 'user:like:item': 1},)

Loaded feature store: TorchBasedFeatureStore(
    {(<OnDiskFeatureDataDomain.NODE: 'node'>, 'user', 'feat_0'): TorchBasedFeature(
        feature=tensor([[0.8010, 0.5134, 0.8430, 0.9071, 0.7871],
                        [0.8035, 0.2322, 0.7717, 0.9457, 0.7426],
                        [0.8361, 0.1679, 0.16