# Measurement of benchmarks and comparison with SOTA 
1. [Amazon Computers](amazon_computers)
2. [Amazon Photo](amazon_photo)
3. [Amazon-Fraud](amazon_fraud)
4. [YelpChi](yelp_chi)
5. [Multitarget 10k](multitarget_10k)
6. [Multitarget 50k](multitarget_50k)
7. [Penn94](penn_94)
8. [Genius](genius)

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from cool_graph.data.utils import count_degree
from torch_geometric.datasets import Amazon
from torch_geometric.utils.convert import to_networkx
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from tqdm import tqdm
import cool_graph
from cool_graph.runners import Runner, HypeRunner
from sklearn.model_selection import train_test_split
from cool_graph.train.metrics import calc_metrics
import pandas as pd
from tqdm.contrib import itertools
import seaborn as sns
from scipy import stats
from typing import List, Optional, Literal
from torch_geometric.data import Data
from cool_graph.datasets import AntiFraud, Multitarget, NonHomophilous, OgbnProteins

In [3]:
class DataInfo:
    name: str
    data: Data
    preproc_data: callable
    split_data: callable
    preproc_results: callable
        
    def __init__(
        self,
        name: str,
        data: Data,
        preproc_data: callable,
        split_data: callable,
        preproc_results: callable
    ):
        self.name=name
        self.data=data
        self.preproc_data=preproc_data
        self.split_data=split_data
        self.preproc_results=preproc_results

In [4]:
class RunnerInfo:
    model_type: Literal['Runner', 'HypeRunner']
    main_metric: 'str'
    overrides: List[str]
    n_trials: int
    verbose: bool
        
    def __init__(
        self,
        model_type,
        main_metric,
        overrides,
        verbose = False,
        n_trials = 1,
    ):
        self.model_type = model_type
        self.main_metric = main_metric
        self.overrides = overrides
        self.verbose = verbose
        self.n_trials = n_trials if model_type == 'HypeRunner' else 0
        
    def init_runner(self, data, train_idx=None, valid_idx=None, seed=None):
        ModelType = Runner if self.model_type == 'Runner' else HypeRunner
        self.model = ModelType(
            data=data,
            seed=seed,
            train_idx=train_idx,
            test_idx=valid_idx,
            main_metric=self.main_metric,
            overrides=self.overrides,
            verbose=self.verbose
        )
    
    def run(self):
        if self.model_type == 'Runner':
            _ = self.model.run()
        elif self.model_type == 'HypeRunner':
            _ = self.model.optimize_run(n_trials=self.n_trials)
    
    def predict(self, data, test_idx):
        preds, indices = self.model.predict_proba(data, test_idx)
        res = calc_metrics(
            data,
            preds,
            self.main_metric,
            indices,
        )
        return res

In [5]:
def eval_score(data_info: DataInfo, runner_info: RunnerInfo, n_tests=5, verbose=True):
    scores = []
    
    for i in tqdm(range(n_tests)) if verbose else range(n_tests):
        seed = 42 + i
        data = data_info.preproc_data(data_info.data.clone())
        train_idx, valid_idx, test_idx = data_info.split_data(data, seed=seed)
        
        runner_info.init_runner(data, train_idx, valid_idx, seed)
        runner_info.run()
        score = runner_info.predict(data, test_idx)
        
        scores.append(data_info.preproc_results(score))
    
    return scores

In [6]:
def data_split_default(data, train_size=0.6, valid_size=0.2, test_size=0.2, seed=None):
    idx = list(range(data.x.shape[0]))
    if hasattr(data, 'label_mask') and data.label_mask != None:
        idx = [id for id in idx if data.label_mask[id]]
        
    train_idx, test_idx = train_test_split(idx, test_size=valid_size + test_size, random_state=seed)
    valid_idx, test_idx = train_test_split(test_idx, test_size=test_size / (valid_size + test_size), random_state=seed)

    return train_idx, valid_idx, test_idx

## 1. Amazon Computers <a name="amazon_computers"></a>

In [7]:
amazon_computers_data = DataInfo(
    name = 'Amazon Computers',
    data = Amazon(root='./data', name='Computers').data,
    preproc_data = lambda data: data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

In [8]:
G = to_networkx(amazon_computers_data.data)
degrees = [G.degree(i) for i in range(G.number_of_nodes())]

In [12]:
amazon_computers_runner = RunnerInfo(
    model_type='Runner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=60', f'training.num_neighbors=[{int(np.quantile(degrees, 0.9))}, {int(np.quantile(degrees, 0.8))}]'],
    verbose = False,
)

In [25]:
amazon_computers_runner.overrides

['training.n_epochs=60', 'training.num_neighbors=[146, 94]']

In [19]:
runner_scores = eval_score(amazon_computers_data, amazon_computers_runner, 10, True)

100%|██████████| 10/10 [1:51:55<00:00, 671.52s/it]


In [20]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.9181 += 0.0039


In [26]:
amazon_computers_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=60', f'training.num_neighbors=[{int(np.quantile(degrees, 0.9))}, {int(np.quantile(degrees, 0.8))}]'],
    verbose = False,
    n_trials=40
)

In [27]:
hyperunner_scores = eval_score(amazon_computers_data, amazon_computers_hyperunner, 5, True)

100%|██████████| 5/5 [12:15:51<00:00, 8830.38s/it]   


In [28]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.9150 += 0.0069


## 2. Amazon Photo <a name="amazon_photo"></a>

In [7]:
amazon_photo_data = DataInfo(
    name = 'Amazon Photo',
    data = Amazon(root='./data', name='Photo').data,
    preproc_data = lambda data: data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

In [8]:
G = to_networkx(amazon_photo_data.data)
degrees = [G.degree(i) for i in range(G.number_of_nodes())]

In [15]:
amazon_photo_runner = RunnerInfo(
    model_type='Runner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=60', f'training.num_neighbors=[{int(np.quantile(degrees, 0.95))}, {int(np.quantile(degrees, 0.8))}]', 'training.batch_size=50'],
    verbose = False,
)

In [17]:
runner_scores = eval_score(amazon_photo_data, amazon_photo_runner, 10, verbose=True)

100%|██████████| 10/10 [2:14:17<00:00, 805.73s/it] 


In [18]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.9585 += 0.0040


In [19]:
amazon_photo_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=60', f'training.num_neighbors=[{int(np.quantile(degrees, 0.95))}, {int(np.quantile(degrees, 0.8))}]', 'training.batch_size=50'],
    verbose = False,
    n_trials=40
)

In [20]:
hyperunner_scores = eval_score(amazon_photo_data, amazon_photo_hyperunner, 5)

100%|██████████| 5/5 [27:05:23<00:00, 19504.65s/it]   


In [21]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.9610 += 0.0021


## 3. Amazon-Fraud <a name="amazon_fraud"></a>

In [7]:
amazon_fraud_data = DataInfo(
    name = 'Amazon Fraud',
    data = AntiFraud(root='./data', name='Amazon').data,
    preproc_data = lambda data: data,
    split_data = lambda data, seed: data_split_default(data, 0.4, 0.2, 0.4, seed),
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/amazon/Amazon_data.pt


In [10]:
amazon_fraud_runner = RunnerInfo(
    model_type='Runner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=50'],
    verbose = False,
)

In [11]:
runner_scores = eval_score(amazon_fraud_data, amazon_fraud_runner, 10)

100%|██████████| 10/10 [35:28<00:00, 212.87s/it]


In [12]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.9559 += 0.0060


In [16]:
amazon_fraud_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=50'],
    verbose = False,
    n_trials=40
)

In [17]:
hyperunner_scores = eval_score(amazon_fraud_data, amazon_fraud_hyperunner, 5)

100%|██████████| 5/5 [6:40:42<00:00, 4808.54s/it]  


In [18]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.9601 += 0.0097


## 4. YelpChi <a name="yelp_chi"></a>

In [7]:
yelpchi_data = DataInfo(
    name = 'YelpChi',
    data = AntiFraud(root='./data', name='YelpChi').data,
    preproc_data = lambda data: data,
    split_data = lambda data, seed: data_split_default(data, 0.4, 0.2, 0.4, seed),
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/yelpchi/YelpChi_data.pt


In [8]:
yelpchi_runner = RunnerInfo(
    model_type='Runner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=50'],
    verbose = False,
)

In [9]:
runner_scores = eval_score(yelpchi_data, yelpchi_runner, 10)

100%|██████████| 10/10 [1:52:40<00:00, 676.03s/it]


In [10]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.8561 += 0.0055


In [11]:
yelpchi_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=50'],
    verbose = False,
    n_trials=40
)

In [12]:
hyperunner_scores = eval_score(yelpchi_data, yelpchi_hyperunner, 5)

100%|██████████| 5/5 [24:28:40<00:00, 17624.05s/it]   


In [13]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.8901 += 0.0012


## 5. Multitarget 10k <a name="multitarget_10k"></a>

In [7]:
def preproc_multitarget_data(data):
    data = Data(**data['node_1'], **data[('node_1', 'to', 'node_1')])
    data.y = torch.tensor(data.y.max(dim=1).values)
    return data

In [8]:
multitarget10k_data = DataInfo(
    name = 'Multitarget 10k',
    data = Multitarget(root='./data', name='10k').data,
    preproc_data = preproc_multitarget_data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/10k/10k_data.pt


In [9]:
multitarget10k_runner = RunnerInfo(
    model_type='Runner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=50'],
    verbose = False,
)

In [10]:
runner_scores = eval_score(multitarget10k_data, multitarget10k_runner, 10)

100%|██████████| 10/10 [11:59<00:00, 71.99s/it]


In [11]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.7302 += 0.0133


In [12]:
multitarget10k_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=50'],
    verbose = False,
    n_trials=40
)

In [13]:
hyperunner_scores = eval_score(multitarget10k_data, multitarget10k_hyperunner, 5)

100%|██████████| 5/5 [2:35:18<00:00, 1863.72s/it]  


In [14]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.8375 += 0.0194


## 6. Multitarget 50k <a name="multitarget_50k"></a>

In [8]:
multitarget50k_data = DataInfo(
    name = 'Multitarget 50k',
    data = Multitarget(root='./data', name='50k').data,
    preproc_data = preproc_multitarget_data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/50k/50k_data.pt


In [9]:
multitarget50k_runner = RunnerInfo(
    model_type='Runner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=50'],
    verbose = False,
)

In [10]:
runner_scores = eval_score(multitarget50k_data, multitarget50k_runner, 10)

100%|██████████| 10/10 [1:03:36<00:00, 381.63s/it]


In [11]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.7557 += 0.0118


In [12]:
multitarget50k_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=50'],
    verbose = False,
    n_trials=40
)

In [13]:
hyperunner_scores = eval_score(multitarget50k_data, multitarget50k_hyperunner, 5)

100%|██████████| 5/5 [18:06:05<00:00, 13033.19s/it]  


In [14]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.8413 += 0.0094


## 7. Penn94 <a name="penn_94"></a>

In [7]:
def preproc_penn94_data(data):
    data.x_cat = data.x[:, 0:6]
    data.x = data.x[:, 0:1]
    return data

In [8]:
penn94_data = DataInfo(
    name = 'Penn94',
    data = NonHomophilous(root='./data', name='Penn94').data,
    preproc_data = preproc_penn94_data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/penn94/Penn94_data.pt


In [9]:
penn94_runner = RunnerInfo(
    model_type='Runner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=50', 'training.num_neighbors=[100, 40]'],
    verbose = False,
)

In [10]:
runner_scores = eval_score(penn94_data, penn94_runner, 10)

100%|██████████| 10/10 [5:04:34<00:00, 1827.41s/it] 


In [11]:
print(f'{np.mean(runner_scores):.4f} += {np.std(runner_scores):.4f}')

0.7912 += 0.0340


In [12]:
penn94_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='accuracy',
    overrides = ['training.n_epochs=50', 'training.num_neighbors=[100, 40]'],
    verbose = False,
    n_trials=40
)

In [13]:
hyperunner_scores = eval_score(penn94_data, penn94_hyperunner, 5)

100%|██████████| 5/5 [42:00:19<00:00, 30243.92s/it]   


In [14]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.8293 += 0.0023


## 8. Genius <a name="genius"></a>

In [8]:
genius_data = DataInfo(
    name = 'Genius',
    data = NonHomophilous(root='./data', name='Genius').data,
    preproc_data = preproc_genius_data,
    split_data = data_split_default,
    preproc_results = lambda data: list(data['y'].values())[0],
)

Using existing file ./data/genius/Genius_data.pt


In [32]:
genius_hyperunner = RunnerInfo(
    model_type='HypeRunner',
    main_metric='roc_auc',
    overrides = ['training.n_epochs=40'],
    verbose = False,
    n_trials=40
)

In [33]:
hyperunner_scores = eval_score(genius_data, genius_hyperunner, 5)

100%|██████████| 5/5 [33:56:04<00:00, 24432.94s/it]   


In [34]:
print(f'{np.mean(hyperunner_scores):.4f} += {np.std(hyperunner_scores):.4f}')

0.9026 += 0.0010
