This file allows for testing the GraphWorld setup with GNN implementations.
It is currently set up to test the SSL methods for the JL benchmarker.

Through this notebook you can attach a debugger.
Note that graph_tool does not work on windows, so we cannot use the graph generators.
Instead, we use the standard datasets from PyG.

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from graph_world.self_supervised_learning.benchmarker_jl import NNNodeBenchmarkerJL
from graph_world.models.basic_gnn import GCN
from torch_geometric.datasets import Planetoid

from graph_world.self_supervised_learning.pretext_tasks.auxiliary_property_based import *
from graph_world.self_supervised_learning.pretext_tasks.contrastive_based_different_scale import *

In [2]:
# Parameter setup (for cora)
benchmark_params = {
    'epochs' : 50,
    'lr' : 0.01,
    'lambda' : 1
}

h_params = {
    'in_channels' : 1433,
    'hidden_channels' : 16,
    'num_layers' : 2,
    'dropout' : 0.5,
    "embedding_corruption_ratio" : 0.1, 
    "partial_embedding_reconstruction" : True,
    'n_parts': 10,
    'shortest_path_cutoff': 6,
    'N_classes': 4,
    'k_largest': 20,
    'k': 20,
    'temperature': 1,
    'num_cluster_iter': 500,
    'alpha': 0.15,
    'n_clusters': 30
}

generator_config = {
    'num_clusters' : 7,
}

pretext_task = SUBG_CON

In [3]:
# Get dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')[0]
# dataset = KarateClub()[0]


In [4]:
torch.arange(0, 10).unsqueeze(dim=1).shape

torch.Size([10, 1])

In [5]:
# Training. You can attach a debugger to w/e is needed inside train
benchmarker = NNNodeBenchmarkerJL(generator_config=generator_config, model_class=GCN, 
                benchmark_params=benchmark_params, h_params=h_params, pretext_task=pretext_task)
                
# benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=~dataset.train_mask, test_mask=~dataset.train_mask)
benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=dataset.val_mask, test_mask=dataset.test_mask)
benchmarker.train(data=dataset, tuning_metric="rocauc_ovr", tuning_metric_is_loss=False)

GCN(1433, 16, num_layers=2)


([2.9594218730926514,
  2.945026397705078,
  2.927903175354004,
  2.889177083969116,
  2.8705172538757324,
  2.833305835723877,
  2.767346143722534,
  2.70845365524292,
  2.6099300384521484,
  2.5228404998779297,
  2.5021626949310303,
  2.285983085632324,
  2.278137445449829,
  2.0885391235351562,
  2.017399549484253,
  1.9704710245132446,
  1.826206922531128,
  1.6873406171798706,
  1.636690378189087,
  1.6233882904052734,
  1.5277948379516602,
  1.4642348289489746,
  1.4291068315505981,
  1.4705944061279297,
  1.3830986022949219,
  1.170853853225708,
  1.279672622680664,
  1.2527964115142822,
  1.1010345220565796,
  1.1427823305130005,
  1.0715858936309814,
  1.16620934009552,
  1.0697882175445557,
  1.0214099884033203,
  0.832078754901886,
  0.9366757869720459,
  0.958958089351654,
  0.9599683880805969,
  0.8971724510192871,
  0.9495126605033875,
  0.9692106246948242,
  0.9658770561218262,
  0.8941949009895325,
  0.9064047336578369,
  0.9841784238815308,
  0.8355443477630615,
  0.90

In [12]:
A = torch.tensor([
    [1, 2, 3],
    [1, 2, 3]
])

torch.concat([A, A], dim=1)

tensor([[1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3]])

In [115]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data, Batch
from torch import tensor
from torch_geometric.nn import GraphConv, global_mean_pool

X = torch.tensor([
    [0],
    [1],
    [2],
    [3],
    [4]
], dtype=torch.float32)
edge_index_s = torch.tensor([
    [0, 0, 0, 0],
    [1, 2, 3, 4],
])
edge_index_t = torch.tensor([
    [0, 0, 0],
    [1, 2, 3],
])


a = Data(x=X[0:2], edge_index=edge_index_s) # [0, 1]
b = Data(x=X[1:], edge_index=edge_index_t) # [1, 2, 3, 4]




batch = Batch.from_data_list([a, b])
model = GraphConv(in_channels=1, out_channels=1)
H = model(x=batch.x, edge_index=batch.edge_index)
for x in DataLoader(batch):
    print(x.x.shape[0])
    print()



2

4



In [116]:
type(batch)

torch_geometric.data.batch.DataBatch

In [113]:
a = torch.tensor([
    [1],
    [2],
    [3]
])


a - a.T


tensor([[ 0, -1, -2],
        [ 1,  0, -1],
        [ 2,  1,  0]])

In [114]:
a, a.T

(tensor([[1],
         [2],
         [3]]),
 tensor([[1, 2, 3]]))

In [3]:
# Training. You can attach a debugger to w/e is needed inside train
benchmarker = NNNodeBenchmarkerJL(generator_config=generator_config, model_class=GCN, 
                benchmark_params=benchmark_params, h_params=h_params, pretext_task=pretext_task)
                
# benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=~dataset.train_mask, test_mask=~dataset.train_mask)
benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=dataset.val_mask, test_mask=dataset.test_mask)
benchmarker.train(data=dataset, tuning_metric="rocauc_ovr", tuning_metric_is_loss=False)

NameError: name 'generator_config' is not defined