In [5]:
import torch
import numpy as np
from torch_geometric.datasets import Planetoid, Amazon
from pathlib import Path
from tqdm import tqdm

In [2]:
#create onw plantetoid split (per class 20 train, 30 val, others test)
def create_pl_split(ds_name):
    #dataset = Planetoid(root='dataset/', name=ds_name)
    dataset = Amazon(root='dataset/', name=ds_name)
    
    y = dataset[0].y.cpu().detach().numpy()
    unique, counts = np.unique(y, return_counts=True)
    splits = {}
    for seed in tqdm(range(100)):
        rng = np.random.default_rng(seed)
        train = []
        val = []
        test = []

        for cl in unique:
            tmp = np.argwhere(y==cl)
            rng.shuffle(tmp)
            train.append(tmp[:20])
            val.append(tmp[20:50])
            test.append(tmp[50:])

        train_ix = np.concatenate(train)
        val_ix = np.concatenate(val)
        test_ix = np.concatenate(test)

        train = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        train[train_ix] = True
        val = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        val[val_ix] = True
        test = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        test[test_ix] = True
        splits[seed] = {"train_mask":train, "val_mask":val, "test_mask": test}
    torch.save(splits,"dataset/"+ds_name+"/own_pl_splits.pt")


In [3]:
#create own "full supervised" split (see gcn2 paper) (per class 60% train, 20% val, 20% test)
def create_622_split(ds_name):
    #dataset = Planetoid(root='dataset/', name=ds_name)
    dataset = Amazon(root='dataset/', name=ds_name)
    
    
    y = dataset[0].y.cpu().detach().numpy()
    unique, counts = np.unique(y, return_counts=True)
    splits = {}
    for seed in tqdm(range(100)):
        rng = np.random.default_rng(seed)
        train = []
        val = []
        test = []

        for cl in unique:
            tmp = np.argwhere(y==cl)
            c1 = int(len(tmp)*.6)
            c2 = int(len(tmp)*.8)
            rng.shuffle(tmp)
            train.append(tmp[:c1])
            val.append(tmp[c1:c2])
            test.append(tmp[c2:])

        train_ix = np.concatenate(train)
        val_ix = np.concatenate(val)
        test_ix = np.concatenate(test)

        train = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        train[train_ix] = True
        val = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        val[val_ix] = True
        test = torch.full_like(dataset[0].y, False, dtype=torch.bool)
        test[test_ix] = True
        splits[seed] = {"train_mask":train, "val_mask":val, "test_mask": test}
    torch.save(splits,"dataset/"+ds_name+"/own_622_splits.pt")

In [4]:
create_pl_split("Cora")
create_622_split("Cora")

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 272.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 271.72it/s]


In [5]:
create_pl_split("Citeseer")
create_622_split("Citeseer")

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 221.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 221.89it/s]


In [6]:
create_pl_split("PubMed")
create_622_split("PubMed")

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 37.81it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 37.65it/s]


In [17]:
#d = torch.load("dataset/Cora/own_splits.pt")

In [6]:
create_pl_split("Computers")
create_622_split("Computers")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 34.01it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 35.08it/s]


In [8]:
create_pl_split("Photo")
create_622_split("Photo")

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_photo.npz
Processing...
Done!
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 45.46it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 47.29it/s]


In [1]:
import torch
from ogb.nodeproppred import PygNodePropPredDataset

import torch_geometric.transforms as T
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_sparse import SparseTensor


In [2]:
device = "cpu"
ds = PygNodePropPredDataset(name = "ogbn-arxiv", root = 'dataset/')
dat = ds[0].to(device)
transform = T.Compose([T.ToUndirected(), T.AddSelfLoops()])
dat = transform(dat)
nnodes = dat.y.shape[0]
edge_index, edge_weight = gcn_norm(dat.edge_index, edge_weight=None, num_nodes=nnodes)

In [3]:
A = SparseTensor(row=edge_index[0], col=edge_index[1], value=edge_weight, sparse_sizes=(nnodes, nnodes)).to(device)
tmp = A

In [4]:
tmp = tmp.matmul(A).to_dense() #save as r=2, unidir=true
#torch.save(tmp, "dataset/ogbn_arxiv/A2_undir_dense.pt")

In [None]:
tmp = tmp.matmul(A.to_dense()) #save as r=3, unidir=true
torch.save(tmp, "dataset/ogbn_arxiv/A3_undir_dense.pt")

In [2]:
from time import perf_counter

In [3]:
pth = "dataset/ogbn_arxiv/A3_undir_sparse.pt"
t1 = perf_counter()
tmp = torch.load(pth)
t2 = perf_counter()
print(t2-t1)
tmp = tmp.to_dense()
t3 = perf_counter()
print(t3-t2)

35.91642682696693
33.58859628601931


In [4]:
pth = "dataset/ogbn_arxiv/A3_undir_dense.pt"
torch.save(tmp, pth)

In [5]:
t1 = perf_counter()
tmp = torch.load(pth)
t2 = perf_counter()
print(t2-t1)

42.549946828978136


In [2]:
device = "cpu"
ds = PygNodePropPredDataset(name = "ogbn-arxiv", root = 'dataset/')
dat = ds[0].to(device)
transform = T.Compose([T.ToUndirected(), T.AddSelfLoops()])
dat = transform(dat)
nnodes = dat.y.shape[0]
edge_index, edge_weight = gcn_norm(dat.edge_index, edge_weight=None, num_nodes=nnodes)

In [3]:
A = SparseTensor(row=edge_index[0], col=edge_index[1], value=edge_weight, sparse_sizes=(nnodes, nnodes)).to(device)
tmp = A

In [4]:
tmp = tmp.matmul(A) #save as r=2, unidir=true

In [6]:
torch.save(tmp, "dataset/ogbn_arxiv/A2_undir_sparse.pt")

In [8]:
tmp = tmp.matmul(A) #save as r=3, unidir=true
torch.save(tmp, "dataset/ogbn_arxiv/A3_undir_sparse.pt")

In [9]:
tmp

SparseTensor(row=tensor([     0,      0,      0,  ..., 169342, 169342, 169342]),
             col=tensor([     0,      2,      4,  ..., 169340, 169341, 169342]),
             val=tensor([8.3572e-03, 9.6971e-08, 2.0686e-08,  ..., 1.1591e-07, 1.1025e-07,
                           1.8022e-02]),
             size=(169343, 169343), nnz=7820075259, density=27.27%)

In [2]:
device = "cpu"
ds = PygNodePropPredDataset(name = "ogbn-arxiv", root = 'dataset/')
dat = ds[0].to(device)
nnodes = dat.y.shape[0]
edge_index, edge_weight = gcn_norm(dat.edge_index, edge_weight=None, num_nodes=nnodes)

In [3]:
A = SparseTensor(row=edge_index[0], col=edge_index[1], value=edge_weight, sparse_sizes=(nnodes, nnodes)).to(device)
tmp = A

In [4]:
tmp = tmp.matmul(A) #save as r=2, unidir=false
torch.save(tmp, "dataset/ogbn_arxiv/A2_dir_sparse.pt")

In [5]:
tmp

SparseTensor(row=tensor([     0,      0,      0,  ..., 169342, 169342, 169342]),
             col=tensor([     0,  14528,  52893,  ...,  36609, 158981, 169342]),
             val=tensor([1.1891e-05, 1.9943e-03, 8.8689e-04,  ..., 8.9992e-03, 1.5798e-01,
                           1.0000e+00]),
             size=(169343, 169343), nnz=9172516, density=0.03%)

In [6]:
tmp = tmp.matmul(A) #save as r=3, unidir=true
torch.save(tmp, "dataset/ogbn_arxiv/A3_dir_sparse.pt")

In [7]:
tmp

SparseTensor(row=tensor([     0,      0,      0,  ..., 169342, 169342, 169342]),
             col=tensor([     0,  14528,  52893,  ...,  36609, 158981, 169342]),
             val=tensor([4.1002e-08, 7.8896e-04, 5.2339e-05,  ..., 1.0499e-02, 1.5859e-01,
                           1.0000e+00]),
             size=(169343, 169343), nnz=36571955, density=0.13%)