This file allows for testing the GraphWorld setup with GNN implementations.
It is currently set up to test the SSL methods for the JL benchmarker.

Through this notebook you can attach a debugger.
Note that graph_tool does not work on windows, so we cannot use the graph generators.
Instead, we use the standard datasets from PyG.

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from graph_world.self_supervised_learning.benchmarker_jl import NNNodeBenchmarkerJL
from graph_world.models.basic_gnn import GCN
from torch_geometric.datasets import Planetoid

from graph_world.self_supervised_learning.pretext_tasks.auxiliary_property_based import *

In [22]:
# Parameter setup (for cora)
benchmark_params = {
    'epochs' : 50,
    'lr' : 0.01,
    'lambda' : 1
}

h_params = {
    'in_channels' : 1433,
    'hidden_channels' : 16,
    'num_layers' : 2,
    'dropout' : 0.5,
    "embedding_corruption_ratio" : 0.1, 
    "partial_embedding_reconstruction" : True,
    'n_parts': 10,
    'shortest_path_cutoff': 6,
    'N_classes': 4,
    'k_largest': 20
}

generator_config = {
    'num_clusters' : 7,
}

pretext_tasks = [PairwiseAttrSim]

In [23]:
# Get dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')[0]
# dataset = KarateClub()[0]


In [24]:
import torch
def get_top_k_indices(input: torch.Tensor, k : int, largest : bool = True):
    '''
    Get the indices of the top K elements.

    Params
    ------
    input:
        The tensor to find the top k elements
    k:
        The number of top elements to find
    largest:
        Whether to find the largest or smallest

    Returns
    -------
    (row_indices, col_indices) where input[row_indices, col_indices] returns the top k elements.

    '''
    assert input.dim() == 2
    N_cols = input.shape[1]
    
    top_k_indices = input.view(-1).topk(k=k, largest=largest).indices
    
    row = torch.div(top_k_indices, N_cols, rounding_mode='floor')
    col = top_k_indices % N_cols
    return row, col

In [25]:
from sklearn.metrics.pairwise import cosine_similarity

A = torch.tensor([
    [1, 2, 3],
    [-1, 2, 3],
    [-5, -4, -3]
])



In [40]:
a = torch.tensor([1, 2, 3, 4, 5])
b = -a

assert ((a.view(-1, 1) - b.t()) >= 0).all()

AssertionError: 

In [26]:
# Training. You can attach a debugger to w/e is needed inside train
benchmarker = NNNodeBenchmarkerJL(generator_config=generator_config, model_class=GCN, 
                benchmark_params=benchmark_params, h_params=h_params, pretext_tasks=pretext_tasks)
# benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=~dataset.train_mask, test_mask=~dataset.train_mask)
benchmarker.SetMasks(train_mask=dataset.train_mask, val_mask=dataset.val_mask, test_mask=dataset.test_mask)
benchmarker.train(data=dataset, tuning_metric="rocauc_ovr", tuning_metric_is_loss=False)

GCN(1433, 16, num_layers=2)


([1.9488672018051147,
  1.9378156661987305,
  1.918062448501587,
  1.890212059020996,
  1.8752741813659668,
  1.8101314306259155,
  1.7388907670974731,
  1.6755969524383545,
  1.5219535827636719,
  1.589640498161316,
  1.438228726387024,
  1.4140865802764893,
  1.338031530380249,
  1.19312584400177,
  1.1600996255874634,
  1.1083388328552246,
  0.9489993453025818,
  0.9280111193656921,
  0.9100370407104492,
  0.9357131123542786,
  0.8003672957420349,
  0.7531022429466248,
  0.7838432788848877,
  0.6505172252655029,
  0.697056770324707,
  0.6251777410507202,
  0.753750205039978,
  0.5855933427810669,
  0.5902490615844727,
  0.5002512335777283,
  0.5278068780899048,
  0.5489422678947449,
  0.49030598998069763,
  0.5483325123786926,
  0.5065605044364929,
  0.6145564317703247,
  0.423135370016098,
  0.4972778260707855,
  0.4736478626728058,
  0.4294847548007965,
  0.3834085464477539,
  0.4876343309879303,
  0.38691672682762146,
  0.3893532454967499,
  0.387218177318573,
  0.459268242120742