In [None]:
# source: https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag

from ogb.nodeproppred import NodePropPredDataset

dataset = NodePropPredDataset(name = "ogbn-mag")

split_idx = dataset.get_idx_split()

train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
graph, label = dataset[0]

In [None]:
import json
import numpy as np
import torch
import scipy.sparse as sparse

In [None]:
graph, label = dataset[0]

print(graph.keys())
print(graph['edge_feat_dict'])
# graph['node_feat_dict']

In [None]:
graph['node_feat_dict']['paper'].shape

In [None]:
graph['edge_index_dict']

In [None]:
PaperNode_id = np.arange(0, graph['num_nodes_dict']['paper'])
paper_feats = graph['node_feat_dict']['paper']
paper_class = label
paper_year = graph['node_year']
AuthorNode_id = np.arange(PaperNode_id[-1]+1, PaperNode_id.shape[-1]+1 + graph['num_nodes_dict']['author'])
InstitutionNode_id = np.arange(AuthorNode_id[-1]+1, AuthorNode_id[-1]+1 + graph['num_nodes_dict']['institution'])
FieldOfStudy_id = np.arange(InstitutionNode_id[-1]+1, InstitutionNode_id[-1]+1 + graph['num_nodes_dict']['field_of_study'])


In [None]:
author_institution_id = np.arange(0, graph['edge_index_dict'][('author','affiliated_with','institution')].shape[1])
author_paper_id = np.arange(author_institution_id[-1]+1, author_institution_id[-1]+1 + graph['edge_index_dict'][('author','writes', 'paper')].shape[1])
paper_paper_id = np.arange(author_paper_id[-1]+1, author_paper_id[-1]+1 + graph['edge_index_dict'][('paper','cites','paper')].shape[1])
paper_FieldOfStudy_id = np.arange(paper_paper_id[-1]+1, paper_paper_id[-1]+1 + graph['edge_index_dict'][('paper','has_topic','field_of_study')].shape[1])

author_institution_edge = np.stack([graph['edge_index_dict'][('author','affiliated_with','institution')][0, :] + PaperNode_id[0],
                                   graph['edge_index_dict'][('author','affiliated_with','institution')][1, :] + InstitutionNode_id[0]]).T

author_paper_edge = np.stack([graph['edge_index_dict'][('author','writes', 'paper')][0, :] + AuthorNode_id[0],
                             graph['edge_index_dict'][('author','writes', 'paper')][1, :] + PaperNode_id[0]]).T

paper_paper_edge = np.stack([graph['edge_index_dict'][('paper','cites','paper')][0, :] + PaperNode_id[0],
                            graph['edge_index_dict'][('paper','cites','paper')][1, :] + PaperNode_id[0]]).T

paper_FieldOfStudy_edge = np.stack([graph['edge_index_dict'][('paper','has_topic','field_of_study')][0, :] + PaperNode_id[0],
                                   graph['edge_index_dict'][('paper','has_topic','field_of_study')][1, :] + FieldOfStudy_id[0]]).T

node_list = np.ones((1, FieldOfStudy_id[-1]+1))
edge_list = np.ones((1, paper_FieldOfStudy_id[-1]+1))

In [None]:

data = {
    "PaperNode_id":  PaperNode_id,
    "paper_feats":  paper_feats              ,
    "paper_class":  paper_class              ,
    "paper_year":  paper_year                ,
    "AuthorNode_id":  AuthorNode_id          ,
    "InstitutionNode_id":  InstitutionNode_id,
    "FieldOfStudy_id":  FieldOfStudy_id      ,
    "author_institution_id": author_institution_id,
    "author_paper_id": author_paper_id      ,
    "paper_paper_id": paper_paper_id       ,
    "paper_FieldOfStudy_id": paper_FieldOfStudy_id,
    "author_institution_edge":  author_institution_edge,
    "author_paper_edge      ": author_paper_edge       ,
    "paper_paper_edge       ":  paper_paper_edge       ,
    "paper_FieldOfStudy_edge":  paper_FieldOfStudy_edge,
    "node_list              ": node_list               ,
    "edge_list              ": edge_list
}

In [None]:
np.savez_compressed("ogbn-mag.npz", **data)


In [None]:
mag = np.load("ogbn-mag.npz", allow_pickle=True)
mag.files

In [None]:
task_data = {
    "train": train_idx,
    "val": valid_idx,
    "test": test_idx
}
np.savez_compressed("ogbn-mag_task.npz", **task_data)