In [1]:
import torch
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric import loader

dataset = PygNodePropPredDataset(name = "ogbn-products", root = 'dataset/')
 
split_idx = dataset.get_idx_split()
data = dataset[0]

# Convert split indices to boolean masks and add them to `data`.
for key, idx in split_idx.items():
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[idx] = True
    data[f'{key}_mask'] = mask
    
cluster_data = loader.ClusterData(data, num_parts=15000, recursive=False)

Computing METIS partitioning...
Done!


In [2]:
import random
import torch
from torch_geometric import loader

from sklearn.random_projection import GaussianRandomProjection

# rp = GaussianRandomProjection(n_components=dataset.num_features)
# cluster_data.data.x = torch.from_numpy(rp.fit_transform(cluster_data.data.x))

with torch.no_grad():
    for cl in cluster_data:
#         labels = torch.squeeze(cl.y).clone().detach()
#         labels[cl.valid_mask] = -1
#         labels[cl.test_mask] = -1
        rp = GaussianRandomProjection(n_components=dataset.num_features)
        cl.x = torch.from_numpy(rp.fit_transform(cl.x))
    row, col, edge_attr = cluster_data.data.adj.t().coo()


In [3]:
def get_adj(row, col, N, asymm_norm=False, set_diag=True, remove_diag=False):
    
    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    if set_diag:
        print('... setting diagonal entries')
        adj = adj.set_diag()
    elif remove_diag:
        print('... removing diagonal entries')
        adj = adj.remove_diag()
    else:
        print('... keeping diag elements as they are')
    if not asymm_norm:
        print('... performing symmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)
    else:
        print('... performing asymmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv = deg.pow(-1.0)
        deg_inv[deg_inv == float('inf')] = 0
        adj = deg_inv.view(-1, 1) * adj

    adj = adj.to_scipy(layout='csr')
    
    return adj


In [4]:
import torch
import torch.nn.functional as F
from torch_sparse import SparseTensor
from torch_geometric.utils import to_undirected, dropout_adj

op_dict = {}
op_dict['label'] = cluster_data.data.y.data.to(torch.long)
op_dict['train_idx'] = cluster_data.data.train_mask
op_dict['valid_idx'] = cluster_data.data.valid_mask
op_dict['test_idx'] =  cluster_data.data.test_mask

x = cluster_data.data.x.numpy()
N = data.num_nodes

print('Start processing')

# print('Preparing undirected operators...')
# edge_index, _ = dropout_adj(data.edge_index, p=0.4, num_nodes=data.num_nodes)

# # to undirected
# print('Making the graph undirected')
# edge_index = to_undirected(edge_index, data.num_nodes)
# row, col = edge_index

# get adj
print('Getting adj matrix')
row, col, edge_attr = cluster_data.data.adj.t().coo()
adj = get_adj(row, col, N)

# preprocessing of features
print('Diffusing node features')
for _ in range(3):
    x = adj @ x

op_dict['embedding'] = torch.from_numpy(x).to(torch.float)
    
torch.save(op_dict, '{}.pt'.format("FIRST_RP_CLUSTER"))


Start processing
Getting adj matrix
... setting diagonal entries
... performing symmetric normalization
Diffusing node features
