In [1]:
import gnn_zoo
from gnn_zoo.utils import io
from gnn_zoo.utils.utils import ensure_dir, set_random_seed

In [2]:
import os.path as osp

In [3]:
all_data_root = '/media/xreco/DEV/xiran/code/gnn_zoo_data'

#### load raw_graph.txt and save as CSR graph

In [4]:
dataset = 'facebook'
raw_data_root = osp.join(all_data_root, 'dataset/raw_' + dataset)
file_raw_graph = osp.join(raw_data_root, 'raw_graph.txt')
print(file_raw_graph)

/media/xreco/DEV/xiran/code/gnn_zoo_data/dataset/raw_facebook/raw_graph.txt


In [5]:
E_src, E_dst = io.txt_graph.load_edges(file_raw_graph)
print(E_src)
print(E_dst)

[   0    0    0 ... 4027 4027 4031]
[   1    2    3 ... 4032 4038 4038]


In [6]:
info, indptr, indices = gnn_zoo.data.from_edges_to_csr(
    E_src, E_dst, graph_type='homo'
)
print(info)

# from_edges_to_csr ...
# remove_repeated_edges ...
## 0 edges are removed
{'graph_type': 'homo', 'num_nodes': 4039, 'num_edges': 88234}


In [7]:
raw_csr_root = osp.join(raw_data_root, 'csr')
ensure_dir(raw_csr_root)

io.save_yaml(osp.join(raw_csr_root, 'info.yaml'), info)
io.save_pickle(osp.join(raw_csr_root, 'indptr.pkl'), indptr)
io.save_pickle(osp.join(raw_csr_root, 'indices.pkl'), indices)

#### Split some edges as validation/test set

In [8]:
set_random_seed(1999)

num_sample = 10_000
min_src_out_degree = 3
min_dst_in_degree = 1

info, indptr, indices, pos_edges = gnn_zoo.data.edges_split(
    info, indptr, indices,
    num_sample, min_src_out_degree, min_dst_in_degree
)
print(info)

# init CSR_Graph_rev_rm_edge...
sampling edges 9999/10000 (99.99%)
num sampled edges: 10000
# csr.to_compact(g.indptr, g.indices)...
{'graph_type': 'homo', 'num_nodes': 4039, 'num_edges': 78234}


In [9]:
data_root = osp.join(all_data_root, 'dataset/instance_' + dataset)
ensure_dir(data_root)

io.save_yaml(osp.join(data_root, 'info.yaml'), info)
io.save_pickle(osp.join(data_root, 'indptr.pkl'), indptr)
io.save_pickle(osp.join(data_root, 'indices.pkl'), indices)
io.save_pickle(osp.join(data_root, 'pos_edges.pkl'), pos_edges)

In [11]:
num_validation = 2000
val_edges = pos_edges[:num_validation]
test_edges = pos_edges[num_validation:]

val_set = gnn_zoo.data.from_edges_to_adj_eval_set(val_edges)
test_set = gnn_zoo.data.from_edges_to_adj_eval_set(test_edges)

io.save_pickle(osp.join(data_root, 'val_set.pkl'), val_set)
io.save_pickle(osp.join(data_root, 'test_set.pkl'), test_set)