This file allows for testing the GraphWorld setup with GNN implementations.
It is currently set up to test the SSL methods for the JL benchmarker.

Through this notebook you can attach a debugger.
Note that graph_tool does not work on windows, so we cannot use the graph generators.
Instead, we use the standard datasets from PyG.

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from graph_world.self_supervised_learning.benchmarker_jl import NNNodeBenchmarkerJL
from graph_world.models.basic_gnn import GCN
from torch_geometric.datasets import Planetoid

from graph_world.self_supervised_learning.pretext_tasks.auxiliary_property_based import *
from graph_world.self_supervised_learning.pretext_tasks.contrastive_based_different_scale import *

In [2]:
# Parameter setup (for cora)
benchmark_params = {
    'epochs' : 10,
    'lr' : 0.01,
    'lambda' : 1
}

h_params = {
    'in_channels' : 1433,
    'hidden_channels' : 16,
    'num_layers' : 2,
    'dropout' : 0.5,
    "embedding_corruption_ratio" : 0.1, 
    "partial_embedding_reconstruction" : True,
    'n_parts': 10,
    'shortest_path_cutoff': 6,
    'N_classes': 4,
    'k_largest': 20,
    'k': 10,
    'temperature': 1,
    'num_cluster_iter': 500,
    'alpha': 0.15,
}

generator_config = {
    'num_clusters' : 7,
}

pretext_task = SUBG_CON

In [3]:
# Get dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')[0]
# dataset = KarateClub()[0]


In [4]:
# Training. You can attach a debugger to w/e is needed inside train
benchmarker = NNNodeBenchmarkerJL(generator_config=generator_config, model_class=GCN, 
                benchmark_params=benchmark_params, h_params=h_params, pretext_task=pretext_task)
                
# benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=~dataset.train_mask, test_mask=~dataset.train_mask)
benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=dataset.val_mask, test_mask=dataset.test_mask)
benchmarker.train(data=dataset, tuning_metric="rocauc_ovr", tuning_metric_is_loss=False)

Using maximize_memory_utilization on non-CUDA tensors. This may lead to undocumented crashes due to CPU OOM killer.


GCN(1433, 16, num_layers=2)


([2.6994714736938477,
  2.6791863441467285,
  2.645183801651001,
  2.5946288108825684,
  2.55507230758667,
  2.4536309242248535,
  2.4063870906829834,
  2.3507513999938965,
  2.1880853176116943,
  2.0390963554382324],
 {'accuracy': 0.683,
  'f1_micro': 0.683,
  'f1_macro': 0.6502601463285714,
  'rocauc_ovr': 0.8004686010808478,
  'rocauc_ovo': 0.8004686010808478,
  'logloss': 3.9055096887499094},
 {'accuracy': 0.652,
  'f1_micro': 0.652,
  'f1_macro': 0.6236162124917816,
  'rocauc_ovr': 0.7890727550637047,
  'rocauc_ovo': 0.7890727550637047,
  'logloss': 4.959867698028684})