In [9]:
import json
import numpy as np
import torch
import scipy.sparse as sparse
from torch_geometric.datasets import Actor

In [10]:
dataset = Actor(root='/tmp/actor')
graph = dataset[0]
print(graph)
print("Length of dataset:",len(dataset)) # 1
print("NumNodes: ",graph.x.shape[0])
print("NumEdges: ",graph.edge_index.shape[1])
print("NumFeats: ",dataset.num_node_features)
print("NumClasses: ",dataset.num_classes)
print("NumTrainingSamples: ",graph.train_mask.nonzero().squeeze().numpy().shape[0])
print("NumValidationSamples: ", graph.val_mask.nonzero().squeeze().numpy().shape[0])
print("NumTestSamples: ", graph.test_mask.nonzero().squeeze().numpy().shape[0])


Data(x=[7600, 932], edge_index=[2, 30019], y=[7600], train_mask=[7600, 10], val_mask=[7600, 10], test_mask=[7600, 10])
Length of dataset: 1
NumNodes:  7600
NumEdges:  30019
NumFeats:  932
NumClasses:  5
NumTrainingSamples:  36480
NumValidationSamples:  24320
NumTestSamples:  15200


In [11]:
# actor_feat
node_feats = sparse.csr_matrix(graph.x.numpy()) # (7600, 932)
# actor_class
node_class = graph.y.numpy() # (7600,)
# actor_edge
edge = graph.edge_index.T.numpy() # (30019, 2)
# actor only has 1 single connected graph
node_list = np.ones((1, graph.x.shape[0]))  # (1, 7600)
edge_list = np.ones((1, graph.edge_index.shape[1]))  # (1, 30019)
data = {
    "node_feats": node_feats,
    "node_class": node_class,
    "edge": edge,
    "node_list": node_list,
    "edge_list": edge_list
}

We can save the data file in only one file `actor.npz`.

In [12]:
from gli.utils import save_data
save_data("actor", **data)
actor = np.load("actor.npz", allow_pickle=True)
actor.files

Save all dense arrays to actor.npz, including ['node_class', 'edge', 'node_list', 'edge_list']
Save sparse matrix node_feats to actor_node_feats.sparse.npz


['node_class', 'edge', 'node_list', 'edge_list']

In [14]:
print("Graph is undirected?: ",graph.is_undirected())

Graph is undirected?:  False


## Task

In [15]:
print(graph.train_mask.shape)

torch.Size([7600, 10])


In [16]:
train_set = []
val_set = []
test_set = []
for i in range(graph.train_mask.shape[1]):
    train_set.append(graph.train_mask[:,i].nonzero().squeeze().numpy())
    val_set.append(graph.val_mask[:,i].nonzero().squeeze().numpy())
    test_set.append(graph.test_mask[:,i].nonzero().squeeze().numpy())

In [17]:
task_data = {}
for i in range(graph.train_mask.shape[1]):
    task_data['train_'+str(i)] = train_set[i]
    task_data['val_'+str(i)] = val_set[i]
    task_data['test_'+str(i)] = test_set[i]

save_data("actor_task", **task_data)

Save all dense arrays to actor_task.npz, including ['train_0', 'val_0', 'test_0', 'train_1', 'val_1', 'test_1', 'train_2', 'val_2', 'test_2', 'train_3', 'val_3', 'test_3', 'train_4', 'val_4', 'test_4', 'train_5', 'val_5', 'test_5', 'train_6', 'val_6', 'test_6', 'train_7', 'val_7', 'test_7', 'train_8', 'val_8', 'test_8', 'train_9', 'val_9', 'test_9']
