### Creating Loader for Dataset Amazon Computers

##### For the runner to work correctly with custom loaders, the data must be divided into batches each having index field. This notebook shows how to create loader properly

In [2]:
from torch_geometric import datasets
import torch
import pandas as pd
from torch_geometric.data import Data

In [3]:
# use simple Amazon dataset with Computers
dataset = datasets.Amazon(root='./data/Amazon', name='Computers')
data = dataset.data
data

Data(x=[13752, 767], edge_index=[2, 491722], y=[13752])

In [4]:
# adding field index for nodes before putting data into loader
data.index = torch.tensor(range(0, len(data.x)))
# creating label_mask field - indices of nodes with non empty labels
data.label_mask = data.y != -100
# creating group_mask field - in homogeneous graph: all zeros
data.group_mask = torch.zeros(len(data.x), dtype=torch.int8)
data

Data(x=[13752, 767], edge_index=[2, 491722], y=[13752], index=[13752], label_mask=[13752], group_mask=[13752])

In [5]:
# getting indices of nodes with non empty labels
non_empty_indices = torch.nonzero(data.label_mask)[:, 0]
non_empty_indices

tensor([    0,     1,     2,  ..., 13749, 13750, 13751])

In [6]:
# making train/test split for non_empty_inidces
from sklearn.model_selection import train_test_split
train_idx, test_idx = train_test_split(
                non_empty_indices,
                train_size=10314,
                test_size=3438,
                random_state=42,
                shuffle=True,
            )
print(train_idx)
print(test_idx)

tensor([ 9240,  9553, 11218,  ...,  5390,   860,  7270])
tensor([ 4045, 13231,  4417,  ...,  6101,  6127, 11061])


In [7]:
from torch_geometric.loader import NeighborLoader
from tqdm import tqdm
# making test and train loaders
train_loader = NeighborLoader(
        data,
        num_neighbors=[2, 1],
        batch_size=250,
        shuffle=True,
        input_nodes=train_idx,
    )
# creating list of batches sampled with train_loader
list_train_loader = []
for sampled_data in tqdm(train_loader, desc="Sample data"):
    sampled_data.label_mask[sampled_data.batch_size :] = False
    list_train_loader.append(sampled_data)

test_loader = NeighborLoader(
        data,
        num_neighbors=[2, 1],
        batch_size=250,
        shuffle=True,
        input_nodes=test_idx,
    )
# creating list of batches sampled with test_loader
list_test_loader = []
for sampled_data in tqdm(test_loader, desc="Sample data"):
    sampled_data.label_mask[sampled_data.batch_size :] = False
    list_test_loader.append(sampled_data)

Sample data: 100%|██████████| 42/42 [00:00<00:00, 194.61it/s]
Sample data: 100%|██████████| 14/14 [00:00<00:00, 197.74it/s]


In [8]:
#now let's run Runner with our loaders
from cool_graph.runners import Runner
runner = Runner(data)
result = runner.run(train_loader=list_train_loader, test_loader=list_test_loader)

2024-07-26 03:39:36.899 | INFO     | cool_graph.train.helpers:eval_epoch:218 - test:
 {'accuracy': 0.617, 'cross_entropy': 1.071, 'f1_weighted': 0.565, 'calc_time': 0.004, 'main_metric': 0.617}
2024-07-26 03:39:37.283 | INFO     | cool_graph.train.helpers:eval_epoch:218 - train:
 {'accuracy': 0.608, 'cross_entropy': 1.079, 'f1_weighted': 0.556, 'calc_time': 0.006, 'main_metric': 0.608}
2024-07-26 03:39:41.805 | INFO     | cool_graph.train.helpers:eval_epoch:218 - test:
 {'accuracy': 0.874, 'cross_entropy': 0.401, 'f1_weighted': 0.869, 'calc_time': 0.002, 'main_metric': 0.874}
2024-07-26 03:39:42.108 | INFO     | cool_graph.train.helpers:eval_epoch:218 - train:
 {'accuracy': 0.91, 'cross_entropy': 0.26, 'f1_weighted': 0.905, 'calc_time': 0.005, 'main_metric': 0.91}
2024-07-26 03:39:46.144 | INFO     | cool_graph.train.helpers:eval_epoch:218 - test:
 {'accuracy': 0.891, 'cross_entropy': 0.377, 'f1_weighted': 0.89, 'calc_time': 0.003, 'main_metric': 0.891}
2024-07-26 03:39:46.484 | INFO  

In [9]:
result["best_loss"]

{'accuracy': 0.891,
 'cross_entropy': 0.377,
 'f1_weighted': 0.89,
 'calc_time': 0.003,
 'main_metric': 0.891,
 'tasks': {'y': {'accuracy': 0.8906340895869692,
   'cross_entropy': 0.3774034082889557,
   'f1_weighted': 0.8904671303679038}},
 'epoch': 10}