In [1]:
import torch
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric import loader

dataset = PygNodePropPredDataset(name = "ogbn-products", root = 'dataset/')
 
split_idx = dataset.get_idx_split()
data = dataset[0]

# Convert split indices to boolean masks and add them to `data`.
for key, idx in split_idx.items():
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[idx] = True
    data[f'{key}_mask'] = mask
    

In [2]:

def get_adj(row, col, N, asymm_norm=False, set_diag=True, remove_diag=False):
    
    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    if set_diag:
        print('... setting diagonal entries')
        adj = adj.set_diag()
    elif remove_diag:
        print('... removing diagonal entries')
        adj = adj.remove_diag()
    else:
        print('... keeping diag elements as they are')
    if not asymm_norm:
        print('... performing symmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)
    else:
        print('... performing asymmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv = deg.pow(-1.0)
        deg_inv[deg_inv == float('inf')] = 0
        adj = deg_inv.view(-1, 1) * adj

    adj = adj.to_scipy(layout='csr')
    
    return adj

In [3]:

import torch
import torch.nn.functional as F
from torch_sparse import SparseTensor
from torch_geometric.utils import to_undirected, dropout_adj


x = data.x.numpy()
N = data.num_nodes

train_idx, valid_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test']
all_idx = torch.cat([train_idx, valid_idx, test_idx])
mapped_train_idx = torch.arange(len(train_idx))
mapped_valid_idx = torch.arange(len(train_idx), len(train_idx) + len(valid_idx))
mapped_test_idx = torch.arange(len(train_idx) + len(valid_idx), len(train_idx) + len(valid_idx) + len(test_idx))

op_dict = {}
op_dict['label'] = data.y.data[all_idx].to(torch.long)
op_dict['split_idx'] = {'train': mapped_train_idx, 'valid': mapped_valid_idx, 'test': mapped_test_idx}

op_dict['op_embedding'] = []
op_dict['op_embedding'].append(torch.from_numpy(x[all_idx]).to(torch.float))

print('Start processing')

Start processing


In [4]:
print('Preparing undirected operators...')
# subsample operator
print('Subsampling (dropping {} %)'.format(100 * 0.4))
edge_index, _ = dropout_adj(data.edge_index, p=0.3, num_nodes=data.num_nodes)

# to undirected
print('Making the graph undirected') 
edge_index = to_undirected(edge_index, data.num_nodes)
row, col = edge_index

# get adj
print('Getting adj matrix')
adj = get_adj(row, col, N)

# preprocessing of features
print('Diffusing node features')
x = data.x.numpy()
for _ in range(5):
    x = adj @ x
    op_dict['op_embedding'].append(torch.from_numpy(x[all_idx]).to(torch.float))

torch.save(op_dict, '{}.pt'.format("BASELINE_SIGN"))


Preparing undirected operators...
Subsampling (dropping 40.0 %)
Making the graph undirected
Getting adj matrix
... setting diagonal entries
... performing symmetric normalization
Diffusing node features
