# Import Libraries

## Import Graph-tool

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()
!conda install -c conda-forge graph-tool

In [None]:
!conda install scikit-learn

In [1]:
# test graph-tool
from graph_tool.all import *

## Import GraphWorld

In [None]:
!git clone https://github.com/google-research/graphworld.git

In [None]:
%cd graphworld/src
!pip install -r requirements.txt

In [4]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import graph_tool.all as gt
import sklearn

In [None]:
%cd /content/graphworld/src

In [6]:
from graph_world.beam.generator_config_sampler import ParamSamplerSpec
from graph_world.beam.generator_beam_handler import GeneratorBeamHandler
import graph_world.generators.sbm_simulator
from graph_world.generators.sbm_simulator import GenerateStochasticBlockModelWithFeatures, MatchType, MakePi, MakeDegrees, MakePropMat
from graph_world.metrics.graph_metrics import graph_metrics, graph_metrics_nx
from graph_world.metrics.node_label_metrics import NodeLabelMetrics

## Import GLI module

In [None]:
%cd /content
!git clone https://github.com/Graph-Learning-Benchmarks/gli.git

In [None]:
%cd /content/gli
!pip install -e .

## Install torch-sparse / pyyaml

In [None]:
!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+${CUDA}.html
!pip install pyyaml

# Controlled Experiments

## Varying Gini-Degree

In [10]:
# the 
def gini(array):
    # Values cannot be 0:
    array = array + 0.000000001
    # Values must be sorted:
    array = np.sort(array)
    # Index per array element:
    index = np.arange(1,array.shape[0]+1)
    # Number of array elements:
    n = array.shape[0]
    # Gini coefficient:
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))

In [11]:
import random
import pandas as pd
import networkx as nx
import os
import numpy as np
import graph_tool as gt
import torch
import matplotlib.pyplot as plt

parent_dir = "/content/gli/datasets/"
partition = {"train_FOLD":0.6, "val_FOLD":0.2, "test_FOLD":0.2}
num_of_split = 1
# transform gini coefficient 

# set GraphWorld parameters
NVERTEX = 5000
FEATURE_CENTER_DISTANCE = 0.05
P2Q = 3.0
CLUSTER_SIZE_SLOPE = 0.0

avg_degree = 20
feature_dim = 16
edge_center_distance = 2.0
edge_feature_dim = 2
feature_cluster_variance = 0.25

NUM_CLUSTERS = 4

gini_arr = [1.5, 2, 2.5, 3, 5]
repeat = 1
for i in range(len(gini_arr)):
  # power exponent!
  POWER_EXPONENT = gini_arr[i]
  # to build additional datasets
  
  for j in range(repeat):

    # create directory 
    directory = "gw_gini_"+str(i)+"_"+str(j)
    # Path
    # if we want to train, then add this line
    directory = os.path.join(parent_dir, directory)
    if not os.path.exists(directory):
      os.mkdir(directory)

    pi = graph_world.generators.sbm_simulator.MakePi(num_communities=NUM_CLUSTERS, community_size_slope = CLUSTER_SIZE_SLOPE)
    prop_mat = graph_world.generators.sbm_simulator.MakePropMat(num_communities=NUM_CLUSTERS, p_to_q_ratio=P2Q)
    out_degrees = graph_world.generators.sbm_simulator.MakeDegrees(POWER_EXPONENT, 1, NVERTEX)

    sampler_out = graph_world.generators.sbm_simulator.GenerateStochasticBlockModelWithFeatures(
      num_vertices=NVERTEX,
      num_edges=NVERTEX*avg_degree,
      pi=pi,
      prop_mat=prop_mat,
      out_degs=out_degrees,
      feature_center_distance=FEATURE_CENTER_DISTANCE,
      feature_dim=feature_dim,
      num_feature_groups=NUM_CLUSTERS,
      feature_group_match_type=MatchType.GROUPED,
      feature_cluster_variance=feature_cluster_variance,
      edge_feature_dim=edge_feature_dim,
      edge_center_distance=edge_center_distance,
      edge_cluster_variance=1,
      normalize_features=True)
    
    graph = sampler_out.graph
    memberships = sampler_out.graph_memberships
    feature_memberships = sampler_out.feature_memberships
    features = sampler_out.node_features
    degrees = graph.get_out_degrees(graph.get_vertices())
    num_removed = 0
    for z, d in enumerate(degrees):
      if d == 0:
          graph.remove_vertex(z - num_removed)
          memberships = np.delete(memberships, [z - num_removed])
          features = np.delete(features, [z - num_removed], axis=0)
          num_removed += 1
    # gt.remove_self_loops(graph)


    # for printing out coreness gini value
    # out = {}                                    
    # nx_graph = nx.Graph()
    # edge_list = [(int(e.source()), int(e.target())) for e in graph.edges()]
    # nx_graph.add_edges_from(edge_list)
    
    # degree_sequence = [d for n, d in nx_graph.degree()]
    # degree_sequence = np.sort(degree_sequence)
    # # print(degree_sequence)
    # # fit = powerlaw.Fit(degree_sequence, verbose=False)
    # # print(fit.power_law.alpha)

    # out['metrics'] = graph_metrics_nx(nx_graph)
    print("degree_gini: ", gini(out_degrees))

    # for dataset.npz
    output = {}
    output["node_feats"] = torch.from_numpy(features.astype("float32"))
    output["node_class"] = torch.from_numpy(memberships)

    output["edge"] = torch.from_numpy(graph.get_edges())
    output["edge_list"] = torch.from_numpy(np.ones(len(list(graph.edges()))))
    output["node_list"] = torch.from_numpy(np.ones(len(list(graph.vertices()))))
    print(output["node_class"].shape)
    np.savez("/content/gli/datasets/gw_gini_"+str(i)+"_"+str(j)+"/gw_gini_"+str(i)+"_"+str(j)+".npz", **output, allow_pickle=True)


    # for dataset_task.npz
    output_task = {}
    node_ids = list(range(len(list(graph.vertices()))))
    train_str = "train_"
    val_str = "val_"
    test_str = 'test_'
    for z in range(num_of_split):
      random.shuffle(node_ids)
      train_len = int(len(node_ids) * partition["train_FOLD"])
      val_len = int(len(node_ids) * partition["val_FOLD"])
      test_len = int(len(node_ids) * partition["test_FOLD"])
      output_task["train"] = node_ids[:train_len]
      output_task["val"] = node_ids[train_len:train_len+val_len]
      output_task["test"] = node_ids[train_len+val_len:]

      np.savez("/content/gli/datasets/gw_gini_"+str(i)+"_"+str(j)+"/gw_gini_"+str(i)+"_"+str(j)+"_task.npz", **output_task, allow_pickle=True)




degree_gini:  0.9068595083485667
torch.Size([3111])
degree_gini:  0.8048813622687628
torch.Size([4741])
degree_gini:  0.5000619029505455
torch.Size([4999])
degree_gini:  0.3609721190090517
torch.Size([5000])
degree_gini:  0.06935995544182426
torch.Size([5000])


### Build GLI Format and Store Dataset

In [13]:
# output json files
import json

# 4 values of power expo
for i in range(len(gini_arr)):
  # to build additional datasets
  # 5 independent datasets
  for j in range(repeat):

    # for metadata.json files
    dataset_str = "gw_gini_"+str(i)+"_"+str(j)+".npz"
    print("dataset_str: ", dataset_str)
    metadata_json = {}
    metadata_json["description"] = "Random Generated Dataset."
    metadata_json["data"] = {}

    metadata_json["data"]["Node"] = {}
    metadata_json["data"]["Edge"] = {}
    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Node"]["NodeFeature"] = {"description": "Node features of random generated dataset, real-valued vectors.",
                            "type": "double",
                            "format": "Tensor",
                            "file": dataset_str,
                            "key": "node_feats"}


    metadata_json["data"]["Node"]["NodeLabel"] = {"description": "Node labels of random generated dataset, int ranged from 1 to 4.",
                                          "type": "int",
                                          "format": "Tensor",
                                          "file": dataset_str,
                                          "key": "node_class"
                                          }

    metadata_json["data"]["Edge"]["_Edge"] = {"file":dataset_str, "key":"edge"}

    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Graph"]["_NodeList"] = {}
    metadata_json["data"]["Graph"]["_NodeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_NodeList"]["key"] = "node_list"
    metadata_json["data"]["Graph"]["_EdgeList"] = {}
    metadata_json["data"]["Graph"]["_EdgeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_EdgeList"]["key"] = "edge_list"


    metadata_json["citation"]="@inproceedings{10.1145/3534678.3539203,\
    author = {Palowitch, John and Tsitsulin, Anton and Mayer, Brandon and Perozzi, Bryan},\
    title = {GraphWorld: Fake Graphs Bring Real Insights for GNNs},\
    year = {2022},\
    isbn = {9781450393850},\
    publisher = {Association for Computing Machinery},\
    url = {https://doi.org/10.1145/3534678.3539203},\
    doi = {10.1145/3534678.3539203},\
    booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\
    pages = {3691–3701},\
    series = {KDD '22}}"

    metadata_json["is_heterogeneous"] = False


    # for task_node_classification_1.json file
    task_json = {}
    task_str = "gw_gini_"+str(i)+"_"+str(j)+"_task"+".npz"
    print("task_str: ", task_str)
    task_json["description"] = "Node classification on random generated dataset."
    task_json["type"] = "NodeClassification"
    task_json["feature"] = ["Node/NodeFeature"]
    task_json["target"] = "Node/NodeLabel"
    task_json["num_classes"] = NUM_CLUSTERS
    task_json["train_set"] = {}
    task_json["train_set"]["file"] = task_str
    task_json["train_set"]["key"] = "train"

    task_json["val_set"] = {}
    task_json["val_set"]["file"] = task_str
    task_json["val_set"]["key"] = "val"

    task_json["test_set"] = {}
    task_json["test_set"]["file"] = task_str
    task_json["test_set"]["key"] = "test"

    # for urls.json files
    url_json = {}
    url_json[dataset_str] = ""
    url_json[task_str] = ""

    # output to each directory
    dir_str = "gw_gini_"+str(i)+"_"+str(j)
    with open(parent_dir+dir_str+'/metadata.json', 'w') as fp:
      json.dump(metadata_json, fp, indent=4)
    with open(parent_dir+dir_str+'/task_node_classification_1.json', 'w') as fp:
      json.dump(task_json, fp, indent=4)
    with open(parent_dir+dir_str+'/urls.json', 'w') as fp:
      json.dump(url_json, fp, indent=4)
 

dataset_str:  gw_gini_0_0.npz
task_str:  gw_gini_0_0_task.npz
dataset_str:  gw_gini_1_0.npz
task_str:  gw_gini_1_0_task.npz
dataset_str:  gw_gini_2_0.npz
task_str:  gw_gini_2_0_task.npz
dataset_str:  gw_gini_3_0.npz
task_str:  gw_gini_3_0_task.npz
dataset_str:  gw_gini_4_0.npz
task_str:  gw_gini_4_0_task.npz


In [14]:
!zip -r /content/all_data_new.zip /content/gli/datasets/gw_gini_*
from google.colab import files
files.download("/content/all_data_new.zip")

  adding: content/gli/datasets/gw_gini_0_0/ (stored 0%)
  adding: content/gli/datasets/gw_gini_0_0/metadata.json (deflated 63%)
  adding: content/gli/datasets/gw_gini_0_0/gw_gini_0_0_task.npz (deflated 70%)
  adding: content/gli/datasets/gw_gini_0_0/task_node_classification_1.json (deflated 59%)
  adding: content/gli/datasets/gw_gini_0_0/urls.json (deflated 36%)
  adding: content/gli/datasets/gw_gini_0_0/gw_gini_0_0.npz (deflated 73%)
  adding: content/gli/datasets/gw_gini_1_0/ (stored 0%)
  adding: content/gli/datasets/gw_gini_1_0/metadata.json (deflated 63%)
  adding: content/gli/datasets/gw_gini_1_0/task_node_classification_1.json (deflated 59%)
  adding: content/gli/datasets/gw_gini_1_0/urls.json (deflated 36%)
  adding: content/gli/datasets/gw_gini_1_0/gw_gini_1_0.npz (deflated 67%)
  adding: content/gli/datasets/gw_gini_1_0/gw_gini_1_0_task.npz (deflated 69%)
  adding: content/gli/datasets/gw_gini_2_0/ (stored 0%)
  adding: content/gli/datasets/gw_gini_2_0/metadata.json (deflated

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Varying Avg Degree

In [23]:
import random
import pandas as pd
import networkx as nx
import os
import numpy as np
import graph_tool as gt
import torch
import matplotlib.pyplot as plt

parent_dir = "/content/gli/datasets/"
partition = {"train_FOLD":0.6, "val_FOLD":0.2, "test_FOLD":0.2}
num_of_split = 1
# transform gini coefficient 

# set GraphWorld parameters
NVERTEX = 5000
FEATURE_CENTER_DISTANCE = 0.05
P2Q = 3.0
CLUSTER_SIZE_SLOPE = 0.0

# avg_degree = 20
feature_dim = 16
edge_center_distance = 2.0
edge_feature_dim = 2
feature_cluster_variance = 0.25
# POWER_EXPONENT = 2


NUM_CLUSTERS = 4
repeat = 1
POWER_EXPONENT = 2



deg_arr = [10, 20, 30, 40, 50]
for i in range(len(deg_arr)):
  
  avg_degree = deg_arr[i]
  
  # to build additional datasets
  
  for j in range(repeat):

    # create directory 
    directory = "gw_deg_"+str(i)+"_"+str(j)
    # Path
    # if we want to train, then add this line
    directory = os.path.join(parent_dir, directory)
    if not os.path.exists(directory):
      os.mkdir(directory)

    pi = graph_world.generators.sbm_simulator.MakePi(num_communities=NUM_CLUSTERS, community_size_slope = CLUSTER_SIZE_SLOPE)
    prop_mat = graph_world.generators.sbm_simulator.MakePropMat(num_communities=NUM_CLUSTERS, p_to_q_ratio=P2Q)
    out_degrees = graph_world.generators.sbm_simulator.MakeDegrees(POWER_EXPONENT, 1, NVERTEX)

    sampler_out = graph_world.generators.sbm_simulator.GenerateStochasticBlockModelWithFeatures(
      num_vertices=NVERTEX,
      num_edges=NVERTEX*avg_degree,
      pi=pi,
      prop_mat=prop_mat,
      out_degs=out_degrees,
      feature_center_distance=FEATURE_CENTER_DISTANCE,
      feature_dim=feature_dim,
      num_feature_groups=NUM_CLUSTERS,
      feature_group_match_type=MatchType.GROUPED,
      feature_cluster_variance=feature_cluster_variance,
      edge_feature_dim=edge_feature_dim,
      edge_center_distance=edge_center_distance,
      edge_cluster_variance=1,
      normalize_features=True)
    
    graph = sampler_out.graph
    memberships = sampler_out.graph_memberships
    feature_memberships = sampler_out.feature_memberships
    features = sampler_out.node_features
    degrees = graph.get_out_degrees(graph.get_vertices())
    num_removed = 0
    for z, d in enumerate(degrees):
      if d == 0:
          graph.remove_vertex(z - num_removed)
          memberships = np.delete(memberships, [z - num_removed])
          features = np.delete(features, [z - num_removed], axis=0)
          num_removed += 1
    # gt.remove_self_loops(graph)


    # for printing out coreness gini value
    # out = {}                                    
    # nx_graph = nx.Graph()
    # edge_list = [(int(e.source()), int(e.target())) for e in graph.edges()]
    # nx_graph.add_edges_from(edge_list)
    
    # degree_sequence = [d for n, d in nx_graph.degree()]
    # degree_sequence = np.sort(degree_sequence)
    # # print(degree_sequence)
    # # fit = powerlaw.Fit(degree_sequence, verbose=False)
    # # print(fit.power_law.alpha)

    # out['metrics'] = graph_metrics_nx(nx_graph)
    # print("psu_diameter: ", _diameter(nx_graph))

    # for dataset.npz
    output = {}
    output["node_feats"] = torch.from_numpy(features.astype("float32"))
    output["node_class"] = torch.from_numpy(memberships)

    output["edge"] = torch.from_numpy(graph.get_edges())
    output["edge_list"] = torch.from_numpy(np.ones(len(list(graph.edges()))))
    output["node_list"] = torch.from_numpy(np.ones(len(list(graph.vertices()))))
    print(output["node_class"].shape)
    np.savez("/content/gli/datasets/gw_deg_"+str(i)+"_"+str(j)+"/gw_deg_"+str(i)+"_"+str(j)+".npz", **output, allow_pickle=True)


    # for dataset_task.npz
    output_task = {}
    node_ids = list(range(len(list(graph.vertices()))))
    train_str = "train_"
    val_str = "val_"
    test_str = 'test_'
    for z in range(num_of_split):
      random.shuffle(node_ids)
      train_len = int(len(node_ids) * partition["train_FOLD"])
      val_len = int(len(node_ids) * partition["val_FOLD"])
      test_len = int(len(node_ids) * partition["test_FOLD"])
      output_task["train"] = node_ids[:train_len]
      output_task["val"] = node_ids[train_len:train_len+val_len]
      output_task["test"] = node_ids[train_len+val_len:]

      np.savez("/content/gli/datasets/gw_deg_"+str(i)+"_"+str(j)+"/gw_deg_"+str(i)+"_"+str(j)+"_task.npz", **output_task, allow_pickle=True)




torch.Size([3996])
torch.Size([4851])
torch.Size([4868])
torch.Size([4992])
torch.Size([4994])


### Build GLI Format and Store Dataset

In [24]:
# output json files
import json

# 4 values of power expo
for i in range(len(deg_arr)):
  # to build additional datasets
  # 5 independent datasets
  for j in range(repeat):

    # for metadata.json files
    dataset_str = "gw_deg_"+str(i)+"_"+str(j)+".npz"
    print("dataset_str: ", dataset_str)
    metadata_json = {}
    metadata_json["description"] = "Random Generated Dataset."
    metadata_json["data"] = {}

    metadata_json["data"]["Node"] = {}
    metadata_json["data"]["Edge"] = {}
    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Node"]["NodeFeature"] = {"description": "Node features of random generated dataset, real-valued vectors.",
                            "type": "double",
                            "format": "Tensor",
                            "file": dataset_str,
                            "key": "node_feats"}


    metadata_json["data"]["Node"]["NodeLabel"] = {"description": "Node labels of random generated dataset, int ranged from 1 to 4.",
                                          "type": "int",
                                          "format": "Tensor",
                                          "file": dataset_str,
                                          "key": "node_class"
                                          }

    metadata_json["data"]["Edge"]["_Edge"] = {"file":dataset_str, "key":"edge"}

    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Graph"]["_NodeList"] = {}
    metadata_json["data"]["Graph"]["_NodeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_NodeList"]["key"] = "node_list"
    metadata_json["data"]["Graph"]["_EdgeList"] = {}
    metadata_json["data"]["Graph"]["_EdgeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_EdgeList"]["key"] = "edge_list"


    metadata_json["citation"]="@inproceedings{10.1145/3534678.3539203,\
    author = {Palowitch, John and Tsitsulin, Anton and Mayer, Brandon and Perozzi, Bryan},\
    title = {GraphWorld: Fake Graphs Bring Real Insights for GNNs},\
    year = {2022},\
    isbn = {9781450393850},\
    publisher = {Association for Computing Machinery},\
    url = {https://doi.org/10.1145/3534678.3539203},\
    doi = {10.1145/3534678.3539203},\
    booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\
    pages = {3691–3701},\
    series = {KDD '22}}"

    metadata_json["is_heterogeneous"] = False


    # for task_node_classification_1.json file
    task_json = {}
    task_str = "gw_deg_"+str(i)+"_"+str(j)+"_task"+".npz"
    print("task_str: ", task_str)
    task_json["description"] = "Node classification on random generated dataset."
    task_json["type"] = "NodeClassification"
    task_json["feature"] = ["Node/NodeFeature"]
    task_json["target"] = "Node/NodeLabel"
    task_json["num_classes"] = NUM_CLUSTERS
    task_json["train_set"] = {}
    task_json["train_set"]["file"] = task_str
    task_json["train_set"]["key"] = "train"

    task_json["val_set"] = {}
    task_json["val_set"]["file"] = task_str
    task_json["val_set"]["key"] = "val"

    task_json["test_set"] = {}
    task_json["test_set"]["file"] = task_str
    task_json["test_set"]["key"] = "test"

    # for urls.json files
    url_json = {}
    url_json[dataset_str] = ""
    url_json[task_str] = ""

    # output to each directory
    dir_str = "gw_deg_"+str(i)+"_"+str(j)
    with open(parent_dir+dir_str+'/metadata.json', 'w') as fp:
      json.dump(metadata_json, fp, indent=4)
    with open(parent_dir+dir_str+'/task_node_classification_1.json', 'w') as fp:
      json.dump(task_json, fp, indent=4)
    with open(parent_dir+dir_str+'/urls.json', 'w') as fp:
      json.dump(url_json, fp, indent=4)
 

dataset_str:  gw_deg_0_0.npz
task_str:  gw_deg_0_0_task.npz
dataset_str:  gw_deg_1_0.npz
task_str:  gw_deg_1_0_task.npz
dataset_str:  gw_deg_2_0.npz
task_str:  gw_deg_2_0_task.npz
dataset_str:  gw_deg_3_0.npz
task_str:  gw_deg_3_0_task.npz
dataset_str:  gw_deg_4_0.npz
task_str:  gw_deg_4_0_task.npz


In [25]:
!zip -r /content/all_data_deg.zip /content/gli/datasets/gw_deg_*
from google.colab import files
files.download("/content/all_data_deg.zip")

updating: content/gli/datasets/gw_deg_0_0/ (stored 0%)
updating: content/gli/datasets/gw_deg_0_0/metadata.json (deflated 63%)
updating: content/gli/datasets/gw_deg_0_0/gw_gini_0_0_task.npz (deflated 69%)
updating: content/gli/datasets/gw_deg_0_0/task_node_classification_1.json (deflated 59%)
updating: content/gli/datasets/gw_deg_0_0/urls.json (deflated 36%)
updating: content/gli/datasets/gw_deg_0_0/gw_deg_0_0.npz (deflated 59%)
updating: content/gli/datasets/gw_deg_1_0/ (stored 0%)
updating: content/gli/datasets/gw_deg_1_0/metadata.json (deflated 63%)
updating: content/gli/datasets/gw_deg_1_0/task_node_classification_1.json (deflated 59%)
updating: content/gli/datasets/gw_deg_1_0/urls.json (deflated 36%)
updating: content/gli/datasets/gw_deg_1_0/gw_deg_1_0.npz (deflated 68%)
updating: content/gli/datasets/gw_deg_1_0/gw_gini_1_0_task.npz (deflated 69%)
updating: content/gli/datasets/gw_deg_2_0/ (stored 0%)
updating: content/gli/datasets/gw_deg_2_0/metadata.json (deflated 63%)
updating: 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Varying Edge Homo

In [26]:
import random
import pandas as pd
import networkx as nx
import os
import numpy as np
import graph_tool as gt
import torch
import matplotlib.pyplot as plt

parent_dir = "/content/gli/datasets/"
partition = {"train_FOLD":0.6, "val_FOLD":0.2, "test_FOLD":0.2}
num_of_split = 1
# transform gini coefficient 

# set GraphWorld parameters
NVERTEX = 5000
FEATURE_CENTER_DISTANCE = 0.05
# P2Q = 4.0
CLUSTER_SIZE_SLOPE = 0.0

avg_degree = 20
feature_dim = 16
edge_center_distance = 2.0
edge_feature_dim = 2
feature_cluster_variance = 0.1
# POWER_EXPONENT = 2


NUM_CLUSTERS = 4
repeat = 1
POWER_EXPONENT = 2.0



p2q_arr = [1,2,3,5,10]
for i in range(len(p2q_arr)):
  

  P2Q = p2q_arr[i]
  # to build additional datasets
  
  for j in range(repeat):

    # create directory 
    directory = "gw_homo_"+str(i)+"_"+str(j)
    # Path
    # if we want to train, then add this line
    directory = os.path.join(parent_dir, directory)
    if not os.path.exists(directory):
      os.mkdir(directory)

    pi = graph_world.generators.sbm_simulator.MakePi(num_communities=NUM_CLUSTERS, community_size_slope = CLUSTER_SIZE_SLOPE)
    prop_mat = graph_world.generators.sbm_simulator.MakePropMat(num_communities=NUM_CLUSTERS, p_to_q_ratio=P2Q)
    out_degrees = graph_world.generators.sbm_simulator.MakeDegrees(POWER_EXPONENT, 1, NVERTEX)

    sampler_out = graph_world.generators.sbm_simulator.GenerateStochasticBlockModelWithFeatures(
      num_vertices=NVERTEX,
      num_edges=NVERTEX*avg_degree,
      pi=pi,
      prop_mat=prop_mat,
      out_degs=out_degrees,
      feature_center_distance=FEATURE_CENTER_DISTANCE,
      feature_dim=feature_dim,
      num_feature_groups=NUM_CLUSTERS,
      feature_group_match_type=MatchType.GROUPED,
      feature_cluster_variance=feature_cluster_variance,
      edge_feature_dim=edge_feature_dim,
      edge_center_distance=edge_center_distance,
      edge_cluster_variance=1,
      normalize_features=True)
    
    graph = sampler_out.graph
    memberships = sampler_out.graph_memberships
    feature_memberships = sampler_out.feature_memberships
    features = sampler_out.node_features
    degrees = graph.get_out_degrees(graph.get_vertices())
    num_removed = 0
    for z, d in enumerate(degrees):
      if d == 0:
          graph.remove_vertex(z - num_removed)
          memberships = np.delete(memberships, [z - num_removed])
          features = np.delete(features, [z - num_removed], axis=0)
          num_removed += 1
    # gt.remove_self_loops(graph)


    # for printing out coreness gini value
    # out = {}                                    
    # nx_graph = nx.Graph()
    # edge_list = [(int(e.source()), int(e.target())) for e in graph.edges()]
    # nx_graph.add_edges_from(edge_list)
    
    # degree_sequence = [d for n, d in nx_graph.degree()]
    # degree_sequence = np.sort(degree_sequence)
    # # print(degree_sequence)
    # # fit = powerlaw.Fit(degree_sequence, verbose=False)
    # # print(fit.power_law.alpha)

    # out['metrics'] = graph_metrics_nx(nx_graph)
    # print("psu_diameter: ", _diameter(nx_graph))


    out = {}
    out['metrics'] = NodeLabelMetrics(graph,
                                  memberships,
                                  features)

    print("edge_homogeneity: ", out['metrics']["edge_homogeneity"])
    # print("avg_in_feature_angular_distance: ", out['metrics']["avg_in_feature_angular_distance"])
    # print("feature_angular_snr: ", out['metrics']["feature_angular_snr"])

    # for dataset.npz
    output = {}
    output["node_feats"] = torch.from_numpy(features.astype("float32"))
    output["node_class"] = torch.from_numpy(memberships)

    output["edge"] = torch.from_numpy(graph.get_edges())
    output["edge_list"] = torch.from_numpy(np.ones(len(list(graph.edges()))))
    output["node_list"] = torch.from_numpy(np.ones(len(list(graph.vertices()))))
    print(output["node_class"].shape)
    np.savez("/content/gli/datasets/gw_homo_"+str(i)+"_"+str(j)+"/gw_homo_"+str(i)+"_"+str(j)+".npz", **output, allow_pickle=True)


    # for dataset_task.npz
    output_task = {}
    node_ids = list(range(len(list(graph.vertices()))))
    train_str = "train_"
    val_str = "val_"
    test_str = 'test_'
    for z in range(num_of_split):
      random.shuffle(node_ids)
      train_len = int(len(node_ids) * partition["train_FOLD"])
      val_len = int(len(node_ids) * partition["val_FOLD"])
      test_len = int(len(node_ids) * partition["test_FOLD"])
      output_task["train"] = node_ids[:train_len]
      output_task["val"] = node_ids[train_len:train_len+val_len]
      output_task["test"] = node_ids[train_len+val_len:]

      np.savez("/content/gli/datasets/gw_homo_"+str(i)+"_"+str(j)+"/gw_homo_"+str(i)+"_"+str(j)+"_task.npz", **output_task, allow_pickle=True)




edge_homogeneity:  0.2510181116707748
torch.Size([4872])
edge_homogeneity:  0.376492808368444
torch.Size([4703])
edge_homogeneity:  0.4584767897423721
torch.Size([4810])
edge_homogeneity:  0.5668411330049261
torch.Size([4811])
edge_homogeneity:  0.7035974172389055
torch.Size([4746])


### Build GLI Format and Store Dataset

In [31]:
# output json files
import json

# 4 values of power expo
for i in range(len(p2q_arr)):
  # to build additional datasets
  # 5 independent datasets
  for j in range(repeat):

    # for metadata.json files
    dataset_str = "gw_homo_"+str(i)+"_"+str(j)+".npz"
    print("dataset_str: ", dataset_str)
    metadata_json = {}
    metadata_json["description"] = "Random Generated Dataset."
    metadata_json["data"] = {}

    metadata_json["data"]["Node"] = {}
    metadata_json["data"]["Edge"] = {}
    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Node"]["NodeFeature"] = {"description": "Node features of random generated dataset, real-valued vectors.",
                            "type": "double",
                            "format": "Tensor",
                            "file": dataset_str,
                            "key": "node_feats"}


    metadata_json["data"]["Node"]["NodeLabel"] = {"description": "Node labels of random generated dataset, int ranged from 1 to 4.",
                                          "type": "int",
                                          "format": "Tensor",
                                          "file": dataset_str,
                                          "key": "node_class"
                                          }

    metadata_json["data"]["Edge"]["_Edge"] = {"file":dataset_str, "key":"edge"}

    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Graph"]["_NodeList"] = {}
    metadata_json["data"]["Graph"]["_NodeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_NodeList"]["key"] = "node_list"
    metadata_json["data"]["Graph"]["_EdgeList"] = {}
    metadata_json["data"]["Graph"]["_EdgeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_EdgeList"]["key"] = "edge_list"


    metadata_json["citation"]="@inproceedings{10.1145/3534678.3539203,\
    author = {Palowitch, John and Tsitsulin, Anton and Mayer, Brandon and Perozzi, Bryan},\
    title = {GraphWorld: Fake Graphs Bring Real Insights for GNNs},\
    year = {2022},\
    isbn = {9781450393850},\
    publisher = {Association for Computing Machinery},\
    url = {https://doi.org/10.1145/3534678.3539203},\
    doi = {10.1145/3534678.3539203},\
    booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\
    pages = {3691–3701},\
    series = {KDD '22}}"

    metadata_json["is_heterogeneous"] = False


    # for task_node_classification_1.json file
    task_json = {}
    task_str = "gw_homo_"+str(i)+"_"+str(j)+"_task"+".npz"
    print("task_str: ", task_str)
    task_json["description"] = "Node classification on random generated dataset."
    task_json["type"] = "NodeClassification"
    task_json["feature"] = ["Node/NodeFeature"]
    task_json["target"] = "Node/NodeLabel"
    task_json["num_classes"] = NUM_CLUSTERS
    task_json["train_set"] = {}
    task_json["train_set"]["file"] = task_str
    task_json["train_set"]["key"] = "train"

    task_json["val_set"] = {}
    task_json["val_set"]["file"] = task_str
    task_json["val_set"]["key"] = "val"

    task_json["test_set"] = {}
    task_json["test_set"]["file"] = task_str
    task_json["test_set"]["key"] = "test"

    # for urls.json files
    url_json = {}
    url_json[dataset_str] = ""
    url_json[task_str] = ""

    # output to each directory
    dir_str = "gw_homo_"+str(i)+"_"+str(j)
    with open(parent_dir+dir_str+'/metadata.json', 'w') as fp:
      json.dump(metadata_json, fp, indent=4)
    with open(parent_dir+dir_str+'/task_node_classification_1.json', 'w') as fp:
      json.dump(task_json, fp, indent=4)
    with open(parent_dir+dir_str+'/urls.json', 'w') as fp:
      json.dump(url_json, fp, indent=4)
 

dataset_str:  gw_homo_0_0.npz
task_str:  gw_homo_0_0_task.npz
dataset_str:  gw_homo_1_0.npz
task_str:  gw_homo_1_0_task.npz
dataset_str:  gw_homo_2_0.npz
task_str:  gw_homo_2_0_task.npz
dataset_str:  gw_homo_3_0.npz
task_str:  gw_homo_3_0_task.npz
dataset_str:  gw_homo_4_0.npz
task_str:  gw_homo_4_0_task.npz


In [32]:
!zip -r /content/all_data_homo.zip /content/gli/datasets/gw_homo_*
from google.colab import files
files.download("/content/all_data_homo.zip")

updating: content/gli/datasets/gw_homo_0_0/ (stored 0%)
updating: content/gli/datasets/gw_homo_0_0/metadata.json (deflated 63%)
updating: content/gli/datasets/gw_homo_0_0/task_node_classification_1.json (deflated 59%)
updating: content/gli/datasets/gw_homo_0_0/urls.json (deflated 36%)
updating: content/gli/datasets/gw_homo_0_0/gw_homo_0_0.npz (deflated 69%)
updating: content/gli/datasets/gw_homo_0_0/gw_homo_0_0_task.npz (deflated 69%)
updating: content/gli/datasets/gw_homo_1_0/ (stored 0%)
updating: content/gli/datasets/gw_homo_1_0/metadata.json (deflated 63%)
updating: content/gli/datasets/gw_homo_1_0/task_node_classification_1.json (deflated 59%)
updating: content/gli/datasets/gw_homo_1_0/urls.json (deflated 36%)
updating: content/gli/datasets/gw_homo_1_0/gw_homo_1_0.npz (deflated 68%)
updating: content/gli/datasets/gw_homo_1_0/gw_homo_1_0_task.npz (deflated 69%)
updating: content/gli/datasets/gw_homo_2_0/ (stored 0%)
updating: content/gli/datasets/gw_homo_2_0/metadata.json (deflated

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Varying Feature SNR

In [29]:
import random
import pandas as pd
import networkx as nx
import os
import numpy as np
import graph_tool as gt
import torch
import matplotlib.pyplot as plt

parent_dir = "/content/gli/datasets/"
partition = {"train_FOLD":0.6, "val_FOLD":0.2, "test_FOLD":0.2}
num_of_split = 1
# transform gini coefficient 

# set GraphWorld parameters
NVERTEX = 5000
FEATURE_CENTER_DISTANCE = 0.05
P2Q = 2.0
CLUSTER_SIZE_SLOPE = 0.0

avg_degree = 20
feature_dim = 16
edge_center_distance = 2.0
edge_feature_dim = 2
# feature_cluster_variance = 0.1
# POWER_EXPONENT = 2


NUM_CLUSTERS = 4
repeat = 1
POWER_EXPONENT = 2.0



var_arr = [0.1, 0.2, 0.5, 1, 2]
for i in range(len(p2q_arr)):
  
  feature_cluster_variance = var_arr[i]
  # to build additional datasets
  
  for j in range(repeat):

    # create directory 
    directory = "gw_var_"+str(i)+"_"+str(j)
    # Path
    # if we want to train, then add this line
    directory = os.path.join(parent_dir, directory)
    if not os.path.exists(directory):
      os.mkdir(directory)

    pi = graph_world.generators.sbm_simulator.MakePi(num_communities=NUM_CLUSTERS, community_size_slope = CLUSTER_SIZE_SLOPE)
    prop_mat = graph_world.generators.sbm_simulator.MakePropMat(num_communities=NUM_CLUSTERS, p_to_q_ratio=P2Q)
    out_degrees = graph_world.generators.sbm_simulator.MakeDegrees(POWER_EXPONENT, 1, NVERTEX)

    sampler_out = graph_world.generators.sbm_simulator.GenerateStochasticBlockModelWithFeatures(
      num_vertices=NVERTEX,
      num_edges=NVERTEX*avg_degree,
      pi=pi,
      prop_mat=prop_mat,
      out_degs=out_degrees,
      feature_center_distance=FEATURE_CENTER_DISTANCE,
      feature_dim=feature_dim,
      num_feature_groups=NUM_CLUSTERS,
      feature_group_match_type=MatchType.GROUPED,
      feature_cluster_variance=feature_cluster_variance,
      edge_feature_dim=edge_feature_dim,
      edge_center_distance=edge_center_distance,
      edge_cluster_variance=1,
      normalize_features=True)
    
    graph = sampler_out.graph
    memberships = sampler_out.graph_memberships
    feature_memberships = sampler_out.feature_memberships
    features = sampler_out.node_features
    degrees = graph.get_out_degrees(graph.get_vertices())
    num_removed = 0
    for z, d in enumerate(degrees):
      if d == 0:
          graph.remove_vertex(z - num_removed)
          memberships = np.delete(memberships, [z - num_removed])
          features = np.delete(features, [z - num_removed], axis=0)
          num_removed += 1
    # gt.remove_self_loops(graph)


    # for printing out coreness gini value
    # out = {}                                    
    # nx_graph = nx.Graph()
    # edge_list = [(int(e.source()), int(e.target())) for e in graph.edges()]
    # nx_graph.add_edges_from(edge_list)
    
    # degree_sequence = [d for n, d in nx_graph.degree()]
    # degree_sequence = np.sort(degree_sequence)
    # # print(degree_sequence)
    # # fit = powerlaw.Fit(degree_sequence, verbose=False)
    # # print(fit.power_law.alpha)

    # out['metrics'] = graph_metrics_nx(nx_graph)
    # print("psu_diameter: ", _diameter(nx_graph))


    out = {}
    out['metrics'] = NodeLabelMetrics(graph,
                                  memberships,
                                  features)

    # print("edge_homogeneity: ", out['metrics']["edge_homogeneity"])
    print("avg_in_feature_angular_distance: ", out['metrics']["avg_in_feature_angular_distance"])
    print("feature_angular_snr: ", out['metrics']["feature_angular_snr"])

    # for dataset.npz
    output = {}
    output["node_feats"] = torch.from_numpy(features.astype("float32"))
    output["node_class"] = torch.from_numpy(memberships)

    output["edge"] = torch.from_numpy(graph.get_edges())
    output["edge_list"] = torch.from_numpy(np.ones(len(list(graph.edges()))))
    output["node_list"] = torch.from_numpy(np.ones(len(list(graph.vertices()))))
    print(output["node_class"].shape)
    np.savez("/content/gli/datasets/gw_var_"+str(i)+"_"+str(j)+"/gw_var_"+str(i)+"_"+str(j)+".npz", **output, allow_pickle=True)


    # for dataset_task.npz
    output_task = {}
    node_ids = list(range(len(list(graph.vertices()))))
    train_str = "train_"
    val_str = "val_"
    test_str = 'test_'
    for z in range(num_of_split):
      random.shuffle(node_ids)
      train_len = int(len(node_ids) * partition["train_FOLD"])
      val_len = int(len(node_ids) * partition["val_FOLD"])
      test_len = int(len(node_ids) * partition["test_FOLD"])
      output_task["train"] = node_ids[:train_len]
      output_task["val"] = node_ids[train_len:train_len+val_len]
      output_task["test"] = node_ids[train_len+val_len:]

      np.savez("/content/gli/datasets/gw_var_"+str(i)+"_"+str(j)+"/gw_var_"+str(i)+"_"+str(j)+"_task.npz", **output_task, allow_pickle=True)




avg_in_feature_angular_distance:  0.6029061441157513
feature_angular_snr:  1.1667956881756547
torch.Size([4675])
avg_in_feature_angular_distance:  0.5544326045810665
feature_angular_snr:  1.0924070771527385
torch.Size([4835])
avg_in_feature_angular_distance:  0.5318325634886577
feature_angular_snr:  1.0439227233726742
torch.Size([4761])
avg_in_feature_angular_distance:  0.5209335711680749
feature_angular_snr:  1.030478392690934
torch.Size([4742])
avg_in_feature_angular_distance:  0.5064364307804582
feature_angular_snr:  1.009111061620942
torch.Size([4680])


### Build GLI Format and Store Dataset

In [30]:
# output json files
import json

# 4 values of power expo
for i in range(len(var_arr)):
  # to build additional datasets
  # 5 independent datasets
  for j in range(repeat):

    # for metadata.json files
    dataset_str = "gw_var_"+str(i)+"_"+str(j)+".npz"
    print("dataset_str: ", dataset_str)
    metadata_json = {}
    metadata_json["description"] = "Random Generated Dataset."
    metadata_json["data"] = {}

    metadata_json["data"]["Node"] = {}
    metadata_json["data"]["Edge"] = {}
    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Node"]["NodeFeature"] = {"description": "Node features of random generated dataset, real-valued vectors.",
                            "type": "double",
                            "format": "Tensor",
                            "file": dataset_str,
                            "key": "node_feats"}


    metadata_json["data"]["Node"]["NodeLabel"] = {"description": "Node labels of random generated dataset, int ranged from 1 to 4.",
                                          "type": "int",
                                          "format": "Tensor",
                                          "file": dataset_str,
                                          "key": "node_class"
                                          }

    metadata_json["data"]["Edge"]["_Edge"] = {"file":dataset_str, "key":"edge"}

    metadata_json["data"]["Graph"] = {}
    metadata_json["data"]["Graph"]["_NodeList"] = {}
    metadata_json["data"]["Graph"]["_NodeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_NodeList"]["key"] = "node_list"
    metadata_json["data"]["Graph"]["_EdgeList"] = {}
    metadata_json["data"]["Graph"]["_EdgeList"]["file"] = dataset_str
    metadata_json["data"]["Graph"]["_EdgeList"]["key"] = "edge_list"


    metadata_json["citation"]="@inproceedings{10.1145/3534678.3539203,\
    author = {Palowitch, John and Tsitsulin, Anton and Mayer, Brandon and Perozzi, Bryan},\
    title = {GraphWorld: Fake Graphs Bring Real Insights for GNNs},\
    year = {2022},\
    isbn = {9781450393850},\
    publisher = {Association for Computing Machinery},\
    url = {https://doi.org/10.1145/3534678.3539203},\
    doi = {10.1145/3534678.3539203},\
    booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\
    pages = {3691–3701},\
    series = {KDD '22}}"

    metadata_json["is_heterogeneous"] = False


    # for task_node_classification_1.json file
    task_json = {}
    task_str = "gw_var_"+str(i)+"_"+str(j)+"_task"+".npz"
    print("task_str: ", task_str)
    task_json["description"] = "Node classification on random generated dataset."
    task_json["type"] = "NodeClassification"
    task_json["feature"] = ["Node/NodeFeature"]
    task_json["target"] = "Node/NodeLabel"
    task_json["num_classes"] = NUM_CLUSTERS
    task_json["train_set"] = {}
    task_json["train_set"]["file"] = task_str
    task_json["train_set"]["key"] = "train"

    task_json["val_set"] = {}
    task_json["val_set"]["file"] = task_str
    task_json["val_set"]["key"] = "val"

    task_json["test_set"] = {}
    task_json["test_set"]["file"] = task_str
    task_json["test_set"]["key"] = "test"

    # for urls.json files
    url_json = {}
    url_json[dataset_str] = ""
    url_json[task_str] = ""

    # output to each directory
    dir_str = "gw_var_"+str(i)+"_"+str(j)
    with open(parent_dir+dir_str+'/metadata.json', 'w') as fp:
      json.dump(metadata_json, fp, indent=4)
    with open(parent_dir+dir_str+'/task_node_classification_1.json', 'w') as fp:
      json.dump(task_json, fp, indent=4)
    with open(parent_dir+dir_str+'/urls.json', 'w') as fp:
      json.dump(url_json, fp, indent=4)
 

dataset_str:  gw_var_0_0.npz
task_str:  gw_var_0_0_task.npz
dataset_str:  gw_var_1_0.npz
task_str:  gw_var_1_0_task.npz
dataset_str:  gw_var_2_0.npz
task_str:  gw_var_2_0_task.npz
dataset_str:  gw_var_3_0.npz
task_str:  gw_var_3_0_task.npz
dataset_str:  gw_var_4_0.npz
task_str:  gw_var_4_0_task.npz


In [33]:
!zip -r /content/all_data_var.zip /content/gli/datasets/gw_var_*
from google.colab import files
files.download("/content/all_data_var.zip")

  adding: content/gli/datasets/gw_var_0_0/ (stored 0%)
  adding: content/gli/datasets/gw_var_0_0/metadata.json (deflated 63%)
  adding: content/gli/datasets/gw_var_0_0/task_node_classification_1.json (deflated 59%)
  adding: content/gli/datasets/gw_var_0_0/urls.json (deflated 36%)
  adding: content/gli/datasets/gw_var_0_0/gw_var_0_0_task.npz (deflated 69%)
  adding: content/gli/datasets/gw_var_0_0/gw_var_0_0.npz (deflated 66%)
  adding: content/gli/datasets/gw_var_1_0/ (stored 0%)
  adding: content/gli/datasets/gw_var_1_0/gw_var_1_0.npz (deflated 69%)
  adding: content/gli/datasets/gw_var_1_0/metadata.json (deflated 63%)
  adding: content/gli/datasets/gw_var_1_0/task_node_classification_1.json (deflated 59%)
  adding: content/gli/datasets/gw_var_1_0/urls.json (deflated 36%)
  adding: content/gli/datasets/gw_var_1_0/gw_var_1_0_task.npz (deflated 69%)
  adding: content/gli/datasets/gw_var_2_0/ (stored 0%)
  adding: content/gli/datasets/gw_var_2_0/metadata.json (deflated 63%)
  adding: co

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Train Models via GLI module

In [236]:
%cd /content/gli/benchmarks/NodeClassification

/content/gli/benchmarks/NodeClassification


In [None]:
!python train.py --dataset gw_gini_0_0 --model GCN 
!python train.py --dataset gw_gini_1_0 --model GCN 
!python train.py --dataset gw_gini_2_0 --model GCN 
!python train.py --dataset gw_gini_3_0 --model GCN 
!python train.py --dataset gw_gini_4_0 --model GCN 

In [None]:
!python train.py --dataset gw_gini_0_0 --model GAT --model-cfg configs/GAT.yaml
!python train.py --dataset gw_gini_1_0 --model GAT --model-cfg configs/GAT.yaml
!python train.py --dataset gw_gini_2_0 --model GAT --model-cfg configs/GAT.yaml
!python train.py --dataset gw_gini_3_0 --model GAT --model-cfg configs/GAT.yaml
!python train.py --dataset gw_gini_4_0 --model GAT --model-cfg configs/GAT.yaml

In [None]:
!python train.py --dataset gw_gini_0_0 --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
!python train.py --dataset gw_gini_1_0 --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
!python train.py --dataset gw_gini_2_0 --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
!python train.py --dataset gw_gini_3_0 --model GraphSAGE --model-cfg configs/GraphSAGE.yaml
!python train.py --dataset gw_gini_4_0 --model GraphSAGE --model-cfg configs/GraphSAGE.yaml

In [None]:
!python train.py --dataset gw_gini_0_0 --model MoNet --model-cfg configs/MoNet.yaml
!python train.py --dataset gw_gini_1_0 --model MoNet --model-cfg configs/MoNet.yaml
!python train.py --dataset gw_gini_2_0 --model MoNet --model-cfg configs/MoNet.yaml
!python train.py --dataset gw_gini_3_0 --model MoNet --model-cfg configs/MoNet.yaml
!python train.py --dataset gw_gini_4_0 --model MoNet --model-cfg configs/MoNet.yaml

In [None]:
!python train.py --dataset gw_gini_0_0 --model MixHop --model-cfg configs/MixHop.yaml
!python train.py --dataset gw_gini_1_0 --model MixHop --model-cfg configs/MixHop.yaml
!python train.py --dataset gw_gini_2_0 --model MixHop --model-cfg configs/MixHop.yaml
!python train.py --dataset gw_gini_3_0 --model MixHop --model-cfg configs/MixHop.yaml
!python train.py --dataset gw_gini_4_0 --model MixHop --model-cfg configs/MixHop.yaml

In [None]:
!python train.py --dataset gw_gini_0_0 --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
!python train.py --dataset gw_gini_1_0 --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
!python train.py --dataset gw_gini_2_0 --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
!python train.py --dataset gw_gini_3_0 --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml
!python train.py --dataset gw_gini_4_0 --model LINKX --model-cfg configs/LINKX.yaml --train-cfg configs/LINKX_train.yaml

In [None]:
!python train.py --dataset gw_gini_0_0 --model MLP 
!python train.py --dataset gw_gini_1_0 --model MLP 
!python train.py --dataset gw_gini_2_0 --model MLP 
!python train.py --dataset gw_gini_3_0 --model MLP 
!python train.py --dataset gw_gini_4_0 --model MLP 