In [1]:
import json
import numpy as np
import torch
import scipy.sparse as sparse
from torch_geometric.datasets import WikipediaNetwork

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# dataset = WikipediaNetwork(root='/tmp/squirrel', name='squirrel')
dataset = WikipediaNetwork(root='/tmp/chameleon', name='chameleon')
graph = dataset[0]
print(graph)
print("Length of dataset:",len(dataset)) # 1
print("NumNodes: ",graph.x.shape[0])
print("NumEdges: ",graph.edge_index.shape[1])
print("NumFeats: ",dataset.num_node_features)
print("NumClasses: ",dataset.num_classes)
print("NumTrainingSamples: ",graph.train_mask.nonzero().squeeze().numpy().shape[0])
print("NumValidationSamples: ", graph.val_mask.nonzero().squeeze().numpy().shape[0])
print("NumTestSamples: ", graph.test_mask.nonzero().squeeze().numpy().shape[0])

Data(x=[2277, 2325], edge_index=[2, 36101], y=[2277], train_mask=[2277, 10], val_mask=[2277, 10], test_mask=[2277, 10])
Length of dataset: 1
NumNodes:  2277
NumEdges:  36101
NumFeats:  2325
NumClasses:  5
NumTrainingSamples:  10920
NumValidationSamples:  7290
NumTestSamples:  4560


In [3]:
# chameleon_feat
node_feats = sparse.csr_matrix(graph.x.numpy()) # (2277, 2325)
# chameleon_class
node_class = graph.y.numpy() # (2277,)
# chameleon_edge
edge = graph.edge_index.T.numpy() # (36101, 2)
# chameleon only has 1 single connected graph
node_list = np.ones((1, graph.x.shape[0]))
edge_list = np.ones((1, graph.edge_index.shape[1]))
data = {
    "node_feats": node_feats,
    "node_class": node_class,
    "edge": edge,
    "node_list": node_list,
    "edge_list": edge_list
}

We can save the data file in only one file `chameleon.npz`.

In [4]:
from gli.utils import save_data
save_data("chameleon", **data)
chameleon = np.load("chameleon.npz", allow_pickle=True)
chameleon.files

Save all dense arrays to chameleon.npz, including ['node_class', 'edge', 'node_list', 'edge_list']
Save sparse matrix node_feats to chameleon_node_feats.sparse.npz


['node_class', 'edge', 'node_list', 'edge_list']

In [5]:
print("Graph is undirected?: ",graph.is_undirected())

Graph is undirected?:  False


## Task

In [6]:
print(graph.train_mask.shape)

torch.Size([2277, 10])


In [7]:
train_set = []
val_set = []
test_set = []
for i in range(graph.train_mask.shape[1]):
    train_set.append(graph.train_mask[:,i].nonzero().squeeze().numpy())
    val_set.append(graph.val_mask[:,i].nonzero().squeeze().numpy())
    test_set.append(graph.test_mask[:,i].nonzero().squeeze().numpy())

In [8]:
task_data = {}
for i in range(graph.train_mask.shape[1]):
    task_data['train_'+str(i)] = train_set[i]
    task_data['val_'+str(i)] = val_set[i]
    task_data['test_'+str(i)] = test_set[i]
save_data("chameleon_task", **task_data)

Save all dense arrays to chameleon_task.npz, including ['train_0', 'val_0', 'test_0', 'train_1', 'val_1', 'test_1', 'train_2', 'val_2', 'test_2', 'train_3', 'val_3', 'test_3', 'train_4', 'val_4', 'test_4', 'train_5', 'val_5', 'test_5', 'train_6', 'val_6', 'test_6', 'train_7', 'val_7', 'test_7', 'train_8', 'val_8', 'test_8', 'train_9', 'val_9', 'test_9']
