In [5]:
import torch
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric import loader

dataset = PygNodePropPredDataset(name = "ogbn-products", root = 'dataset/')
 
split_idx = dataset.get_idx_split()
data = dataset[0]

# Convert split indices to boolean masks and add them to `data`.
for key, idx in split_idx.items():
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[idx] = True
    data[f'{key}_mask'] = mask
    

In [6]:
def get_adj(row, col, N, asymm_norm=False, set_diag=True, remove_diag=False):
    
    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    if set_diag:
        print('... setting diagonal entries')
        adj = adj.set_diag()
    elif remove_diag:
        print('... removing diagonal entries')
        adj = adj.remove_diag()
    else:
        print('... keeping diag elements as they are')
    if not asymm_norm:
        print('... performing symmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)
    else:
        print('... performing asymmetric normalization')
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv = deg.pow(-1.0)
        deg_inv[deg_inv == float('inf')] = 0
        adj = deg_inv.view(-1, 1) * adj

    adj = adj.to_scipy(layout='csr')
    
    return adj


In [None]:

def maybe_num_nodes(edge_index, num_nodes=None):
    if num_nodes is not None:
        return num_nodes
    elif isinstance(edge_index, Tensor):
        return int(edge_index.max()) + 1 if edge_index.numel() > 0 else 0
    else:
        return max(edge_index.size(0), edge_index.size(1))

def dropout_adj(edge_index, edge_attr=None, p=0.5, force_undirected=False,
                num_nodes=None, training=True):

    if p < 0. or p > 1.:
        raise ValueError('Dropout probability has to be between 0 and 1, '
                         'but got {}'.format(p))

    if not training or p == 0.0:
        return edge_index, edge_attr

    N = maybe_num_nodes(edge_index, num_nodes)
    row, col = edge_index

    if force_undirected:
        row, col, edge_attr = filter_adj(row, col, edge_attr, row < col)

    mask = edge_index.new_full((row.size(0), ), 1 - p, dtype=torch.float)
    mask = torch.bernoulli(mask).to(torch.bool)

    row, col, edge_attr = filter_adj(row, col, edge_attr, mask)

    if force_undirected:
        edge_index = torch.stack(
            [torch.cat([row, col], dim=0),
             torch.cat([col, row], dim=0)], dim=0)
        if edge_attr is not None:
            edge_attr = torch.cat([edge_attr, edge_attr], dim=0)
        edge_index, edge_attr = coalesce(edge_index, edge_attr, N, N)
    else:
        edge_index = torch.stack([row, col], dim=0)

    return edge_index, edge_attr

In [7]:
import torch
import torch.nn.functional as F
from torch_sparse import SparseTensor
from torch_geometric.utils import to_undirected, dropout_adj

import random
from torch_geometric import loader

from sklearn.random_projection import GaussianRandomProjection

x = data.x.numpy()
N = data.num_nodes

print('Start processing')

# print('Preparing undirected operators...')
# edge_index, _ = dropout_adj(data.edge_index, p=0.4, num_nodes=data.num_nodes)

# # to undirected
# print('Making the graph undirected')
# edge_index = to_undirected(edge_index, data.num_nodes)
# row, col = edge_index

# get adj
print('Getting adj matrix')
row, col = data.edge_index
adj = get_adj(row, col, N)

# preprocessing of features
print('Diffusing node features')
for _ in range(3):
    x = adj @ x

# rp = GaussianRandomProjection(n_components=dataset.num_features)
# x = torch.from_numpy(rp.fit_transform(x))

data.x = torch.from_numpy(x).to(torch.float)

Start processing
Getting adj matrix
... setting diagonal entries
... performing symmetric normalization
Diffusing node features


In [4]:
import random
import torch
from torch_geometric import loader
from torch_geometric.data import Data

from sklearn.random_projection import GaussianRandomProjection

cluster_data = loader.ClusterData(data, num_parts=1000, recursive=False)

# rp = GaussianRandomProjection(n_components=dataset.num_features)
# cluster_data.data.x = torch.from_numpy(rp.fit_transform(cluster_data.data.x))

with torch.no_grad():
    for cl in cluster_data:
#         labels = torch.squeeze(cl.y).clone().detach()
#         labels[cl.valid_mask] = -1
#         labels[cl.test_mask] = -1
        rp = GaussianRandomProjection(n_components=dataset.num_features)
        cl.x = torch.from_numpy(rp.fit_transform(cl.x))

op_dict = {}
op_dict['embedding'] = cluster_data.data.x
op_dict['label'] = cluster_data.data.y.to(torch.long)
op_dict['train_idx'] = cluster_data.data.train_mask
op_dict['valid_idx'] = cluster_data.data.valid_mask
op_dict['test_idx'] = cluster_data.data.test_mask

torch.save(op_dict, '{}.pt'.format("LAST_RP_CLUSTER"))

Computing METIS partitioning...
Done!


PCA(n_components=2)
(100, 2)
