# Graph-level label only membership inference attack (GLO-MIA).
As described here: https://arxiv.org/pdf/2503.19070. This attack method is suitable for multi-graph datasets, and assumes the strictest black box scenario in which the attacker has no access to model architecture, and queries only return labels insteasd of logits/probabilities.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from ogb.graphproppred import PygGraphPropPredDataset

In [3]:
import torch
import numpy as np

from tqdm import tqdm
from multiprocessing import Pool
from torch import nn, optim
from torch_geometric import nn as gnn, transforms as T
from torch_geometric.datasets import TUDataset, PPI
from torch_geometric.loader import DataLoader as GDataLoader
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from ml_util import CustomGATModel, GATMolhivModel, train_model_multi_graph, load_model
from util import onehot_transform, graph_train_test_split, calculate_robustness_scores
from train_models import get_dataset, shadow_target_split, train_gat

In [4]:
DEVICE = ('cuda:0' if torch.cuda.is_available() else 'cpu')

In [60]:
dataset_name = 'MUTAG'

dataset = get_dataset(dataset_name) 
t_dataset_train, t_dataset_test, s_dataset_train, s_dataset_test = shadow_target_split(dataset, target_test_size=0.25, shadow_test_size=0.25)

In [63]:
# Train target model
model_params = {
    'heads': 4,
    'layers': 4
}

t_model, t_save_path = train_gat(dataset_name, 't', t_dataset_train, dataset_test=t_dataset_test, model_params=model_params, verbose=2)

Learning rate: 0.001
No learning rate scheduling!
Training for 125 epochs, with batch size=16
Using validation data (24 samples)
Using device: cpu

-----Epoch 1/125-----
Batch 5/5 | loss: 0.66609 (0.040s) | train acc: 0.414 | train AUC: 0.544
Validation: val loss: 0.685 | val acc: 0.375 | val F1: 0.000 | val AUC: 0.704

-----Epoch 2/125-----
Batch 5/5 | loss: 0.59481 (0.038s) | train acc: 0.771 | train AUC: 0.803
Validation: val loss: 0.682 | val acc: 0.458 | val F1: 0.235 | val AUC: 0.674

-----Epoch 3/125-----
Batch 5/5 | loss: 0.55634 (0.038s) | train acc: 0.757 | train AUC: 0.803
Validation: val loss: 0.677 | val acc: 0.667 | val F1: 0.692 | val AUC: 0.630

-----Epoch 4/125-----
Batch 5/5 | loss: 0.55557 (0.038s) | train acc: 0.743 | train AUC: 0.815
Validation: val loss: 0.674 | val acc: 0.583 | val F1: 0.583 | val AUC: 0.733

-----Epoch 5/125-----
Batch 5/5 | loss: 0.44470 (0.039s) | train acc: 0.814 | train AUC: 0.897
Validation: val loss: 0.673 | val acc: 0.583 | val F1: 0.643 

In [64]:
# Train shadow model
s_model, s_save_path = train_gat(dataset_name, 's', s_dataset_train, dataset_test=s_dataset_test, model_params=model_params, verbose=2)

Learning rate: 0.001
No learning rate scheduling!
Training for 125 epochs, with batch size=16
Using validation data (24 samples)
Using device: cpu

-----Epoch 1/125-----
Batch 5/5 | loss: 0.63902 (0.039s) | train acc: 0.629 | train AUC: 0.604
Validation: val loss: 0.689 | val acc: 0.583 | val F1: 0.737 | val AUC: 0.771

-----Epoch 2/125-----
Batch 5/5 | loss: 0.61348 (0.038s) | train acc: 0.729 | train AUC: 0.729
Validation: val loss: 0.682 | val acc: 0.583 | val F1: 0.737 | val AUC: 0.814

-----Epoch 3/125-----
Batch 5/5 | loss: 0.47147 (0.038s) | train acc: 0.814 | train AUC: 0.860
Validation: val loss: 0.677 | val acc: 0.583 | val F1: 0.737 | val AUC: 0.736

-----Epoch 4/125-----
Batch 5/5 | loss: 0.43746 (0.038s) | train acc: 0.829 | train AUC: 0.873
Validation: val loss: 0.670 | val acc: 0.583 | val F1: 0.737 | val AUC: 0.807

-----Epoch 5/125-----
Batch 5/5 | loss: 0.44070 (0.039s) | train acc: 0.786 | train AUC: 0.869
Validation: val loss: 0.659 | val acc: 0.583 | val F1: 0.737 