# Graph-level label only membership inference attack (GLO-MIA).
As described here: https://arxiv.org/pdf/2503.19070. This attack method is suitable for multi-graph datasets, and assumes the strictest black box scenario in which the attacker has no access to model architecture, and queries only return labels insteasd of logits/probabilities.

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
from ogb.graphproppred import PygGraphPropPredDataset

In [5]:
import torch
import numpy as np

from tqdm import tqdm
from multiprocessing import Pool
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset
from torch_geometric import nn as gnn, transforms as T
from torch_geometric.datasets import TUDataset, PPI
from torch_geometric.loader import DataLoader as GDataLoader
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import auc as AUC, roc_curve, roc_auc_score, accuracy_score, f1_score, average_precision_score, precision_recall_curve
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from ml_util import (
    CustomGATModel, 
    GenericAttackModel, 
    LogitsDefenseModel,
    predict_multi_graph, 
    predict, 
    train_model_multi_graph,
    test_model_multi_graph,
    load_model, get_accuracy, 
    get_auroc_score, 
    train_model
)
from util import onehot_transform, graph_train_test_split, calculate_robustness_scores
from train_models import get_dataset, shadow_target_split, train_gat, _TRAINING_PARAMS
from glo_mia import search_scaler

In [6]:
DEVICE = ('cuda:0' if torch.cuda.is_available() else 'cpu')

In [7]:
dataset_name = 'MUTAG'
METRIC = 'robustness'

In [8]:
# Number of times to train a target/shadow model on the same set of data 
NUM_RUNS = 7
NUM_PERTURB = 500

In [9]:
dataset = get_dataset(dataset_name)
num_feat = dataset[0].x.shape[1]
num_categories = dataset[0].y.shape[1]

In [9]:
scores_t_train, scores_t_test = [], [] # Robustness scores of the target train/test data
scores_s_train, scores_s_test = [], [] # Robustness scores of the shadow train/test data
t_models, s_models = [], [] # Lists of target and shadow models

dataset = get_dataset(dataset_name) 
t_dataset_train, t_dataset_test, s_dataset_train, s_dataset_test = shadow_target_split(dataset, target_test_size=0.25, shadow_test_size=0.25)
util_loss_fn = nn.CrossEntropyLoss() 

for i in range(NUM_RUNS):
    print(f'Run #{i+1}')
    # Train target model
    t_save_path = f'mia-models/t_model_gat_{dataset_name}_{i}.pth'
    t_model, _ = train_gat(dataset_name, 't', t_dataset_train, dataset_test=t_dataset_test, save_path=t_save_path, device=DEVICE, verbose=1)
    # Train shadow model
    s_save_path = f'mia-models/s_model_gat_{dataset_name}_{i}.pth'
    s_model, _ = train_gat(dataset_name, 's', s_dataset_train, dataset_test=s_dataset_test, save_path=s_save_path, device=DEVICE, verbose=1)
    
#     t_model = LogitsDefenseModel(t_model)
#     s_model = LogitsDefenseModel(s_model)
    t_model.eval()
    s_model.eval()
    # Target model performance on train/test data
    _, acc, f1, auc = test_model_multi_graph(t_model, util_loss_fn, t_dataset_train, device=DEVICE)
    print(f'Target model (test): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
    
    _, acc, f1, auc = test_model_multi_graph(t_model, util_loss_fn, t_dataset_test, device=DEVICE)
    print(f'Target model (test): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
    
    # Shadow model performance on test data
    _, acc, f1, auc = test_model_multi_graph(s_model, util_loss_fn, s_dataset_train, device=DEVICE)
    print(f'Shadow model (train): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
    
    _, acc, f1, auc = test_model_multi_graph(s_model, util_loss_fn, s_dataset_test, device=DEVICE)
    print(f'Shadow model (test): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
    
    # Get robustness scores for shadow model training/test data
#     scalers, thresholds, all_acc, all_auroc, s_scores_train, s_scores_test = search_scaler(s_model, s_dataset_train, s_dataset_test,
#                                                                                            n_perturb_per_graph=1000, scaler_max=5.5, ds=0.3, metric=METRIC)
    scalers = np.arange(0.1, 5., 0.25)
    s_scores_train = np.array([calculate_robustness_scores(s_model, s_dataset_train, n_perturb_per_graph=NUM_PERTURB, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])
    s_scores_test = np.array([calculate_robustness_scores(s_model, s_dataset_test, n_perturb_per_graph=NUM_PERTURB, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])
    
    # Get robustness scores for target model training/test data
    t_scores_train = np.array([calculate_robustness_scores(t_model, t_dataset_train, n_perturb_per_graph=NUM_PERTURB, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])
    t_scores_test = np.array([calculate_robustness_scores(t_model, t_dataset_test, n_perturb_per_graph=NUM_PERTURB, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])
    
    # Label shadow data as member/non-member to be used as attack model train set
    X = np.concatenate([s_scores_train.T, s_scores_test.T])
    labels = np.array(([1] * len(s_dataset_train)) + ([0] * len(s_dataset_test))).reshape(-1,1)
    y = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels)
    att_dataset = TensorDataset(torch.Tensor(X), torch.Tensor(y))

    # Label target data as member/non-member to be used as attack model test set
    X_test = np.concatenate([t_scores_train.T, t_scores_test.T])
    labels_test = np.array(([1] * len(t_dataset_train)) + ([0] * len(t_dataset_test))).reshape(-1,1)
    y_test = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels_test)
    att_dataset_test = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
    
    # Train attack model
    params = _TRAINING_PARAMS['GAT'][dataset_name]
    lr = params['lr']
    epochs = params['epochs']
    batch_size = params['batch_size']
    weight_decay = params['weight_decay']

    att_model = GenericAttackModel(num_feat=len(scalers), dropout=0.4).to(DEVICE)
    optimizer = optim.Adam(att_model.parameters(), lr=lr, weight_decay=weight_decay)
    weight = compute_class_weight('balanced', classes=np.unique(y.argmax(axis=1)), y=y.argmax(axis=1))
    loss_fn = nn.CrossEntropyLoss(weight=torch.tensor(weight).to(DEVICE))
    train_model(att_model, optimizer, att_dataset, loss_fn, epochs, batch_size, val_dataset=None, device=DEVICE, verbose=1)
    att_model.eval()
    
    # Evaluate attack model on the target and shadow datasets
    att_pred = predict(att_model, att_dataset, device=DEVICE, logits=True, return_type='pt')
    att_pred_test = predict(att_model, att_dataset_test, device=DEVICE, logits=True, return_type='pt')
    
    # Get AUROC on target and shadow datasets
    print(f'Shadow (train) AUROC: {roc_auc_score(y.argmax(axis=1), att_pred[:,1].cpu())}')
    print(f'Target (test) AUROC: {roc_auc_score(y_test.argmax(axis=1), att_pred_test[:,1].cpu())}')
    
    scores_t_train.append(t_scores_train)
    scores_t_test.append(t_scores_test)
    scores_s_train.append(s_scores_train)
    scores_s_test.append(s_scores_test)
    t_models.append(t_model.to('cpu'))
    s_models.append(s_model.to('cpu'))



Run #1


100%|██████████| 125/125 [00:06<00:00, 19.45it/s]
100%|██████████| 125/125 [00:06<00:00, 19.56it/s]


Target model (test): acc: 0.8714, F1: 0.9143, AUROC: 0.9489
Target model (test): acc: 0.5833, F1: 0.7222, AUROC: 0.7413
Shadow model (train): acc: 0.9000, F1: 0.9213, AUROC: 0.9733
Shadow model (test): acc: 0.9167, F1: 0.9474, AUROC: 0.9789


100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:24<00:00,  1.24s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 125/125 [00:01<00:00, 88.92it/s]


Shadow (train) AUROC: 0.6833333333333333
Target (test) AUROC: 0.46547619047619054
Run #2


100%|██████████| 125/125 [00:06<00:00, 19.57it/s]
100%|██████████| 125/125 [00:06<00:00, 19.92it/s]


Target model (test): acc: 0.8429, F1: 0.8972, AUROC: 0.9091
Target model (test): acc: 0.6667, F1: 0.7647, AUROC: 0.8531
Shadow model (train): acc: 0.8857, F1: 0.9184, AUROC: 0.9698
Shadow model (test): acc: 0.9167, F1: 0.9500, AUROC: 0.9684


100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 125/125 [00:01<00:00, 89.69it/s]


Shadow (train) AUROC: 0.8071428571428572
Target (test) AUROC: 0.4023809523809524
Run #3


100%|██████████| 125/125 [00:06<00:00, 19.96it/s]
100%|██████████| 125/125 [00:06<00:00, 20.01it/s]


Target model (test): acc: 0.7857, F1: 0.8352, AUROC: 0.8769
Target model (test): acc: 0.7500, F1: 0.7692, AUROC: 0.7552
Shadow model (train): acc: 0.8571, F1: 0.9000, AUROC: 0.9298
Shadow model (test): acc: 0.8750, F1: 0.9231, AUROC: 0.8211


100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.20s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.20s/it]
100%|██████████| 125/125 [00:01<00:00, 90.22it/s]


Shadow (train) AUROC: 0.6244047619047619
Target (test) AUROC: 0.6005952380952381
Run #4


100%|██████████| 125/125 [00:06<00:00, 19.98it/s]
100%|██████████| 125/125 [00:06<00:00, 19.98it/s]


Target model (test): acc: 0.8000, F1: 0.8727, AUROC: 0.8778
Target model (test): acc: 0.6667, F1: 0.7647, AUROC: 0.6364
Shadow model (train): acc: 0.9143, F1: 0.9375, AUROC: 0.9724
Shadow model (test): acc: 0.9167, F1: 0.9500, AUROC: 0.9895


100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.20s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:24<00:00,  1.20s/it]
100%|██████████| 125/125 [00:01<00:00, 89.87it/s]


Shadow (train) AUROC: 0.8351190476190476
Target (test) AUROC: 0.5142857142857142
Run #5


100%|██████████| 125/125 [00:06<00:00, 19.85it/s]
100%|██████████| 125/125 [00:06<00:00, 19.79it/s]


Target model (test): acc: 0.7857, F1: 0.8421, AUROC: 0.8759
Target model (test): acc: 0.5833, F1: 0.6875, AUROC: 0.5175
Shadow model (train): acc: 0.8571, F1: 0.8958, AUROC: 0.9022
Shadow model (test): acc: 0.9167, F1: 0.9500, AUROC: 0.8737


100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 125/125 [00:01<00:00, 90.42it/s]


Shadow (train) AUROC: 0.6958333333333333
Target (test) AUROC: 0.42321428571428577
Run #6


100%|██████████| 125/125 [00:06<00:00, 19.66it/s]
100%|██████████| 125/125 [00:06<00:00, 19.94it/s]


Target model (test): acc: 0.8714, F1: 0.9143, AUROC: 0.9290
Target model (test): acc: 0.6250, F1: 0.7429, AUROC: 0.4755
Shadow model (train): acc: 0.8571, F1: 0.8958, AUROC: 0.8498
Shadow model (test): acc: 0.8333, F1: 0.9000, AUROC: 0.8947


100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.21s/it]
100%|██████████| 125/125 [00:01<00:00, 89.82it/s]


Shadow (train) AUROC: 0.6761904761904762
Target (test) AUROC: 0.4342261904761905
Run #7


100%|██████████| 125/125 [00:06<00:00, 19.95it/s]
100%|██████████| 125/125 [00:06<00:00, 20.01it/s]


Target model (test): acc: 0.4286, F1: 0.3939, AUROC: 0.7396
Target model (test): acc: 0.4167, F1: 0.3636, AUROC: 0.4965
Shadow model (train): acc: 0.8857, F1: 0.9167, AUROC: 0.9618
Shadow model (test): acc: 0.9583, F1: 0.9744, AUROC: 0.9684


100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 20/20 [00:24<00:00,  1.23s/it]
100%|██████████| 20/20 [00:24<00:00,  1.22s/it]
100%|██████████| 125/125 [00:01<00:00, 89.59it/s]

Shadow (train) AUROC: 0.8452380952380952
Target (test) AUROC: 0.513095238095238





In [None]:
# t_dataset_train, t_dataset_test, s_dataset_train, s_dataset_test = shadow_target_split(dataset, target_test_size=0.4, shadow_test_size=0.4)

In [None]:

# Label shadow data as member/non-member to be used as attack model train set
# X = np.concatenate([s_scores_train.T, s_scores_test.T])
s_scores_train = np.stack(scores_s_train).mean(axis=0)
s_scores_test = np.stack(scores_s_test).mean(axis=0)
t_scores_train = np.stack(scores_t_train).mean(axis=0)
t_scores_test = np.stack(scores_t_test).mean(axis=0)
# t_scores_train = scores_t_train[0]
# t_scores_test = scores_t_test[0]

X = np.concatenate([s_scores_train.T, s_scores_test.T])
labels = np.array(([1] * len(s_dataset_train)) + ([0] * len(s_dataset_test))).reshape(-1,1)
y = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels)
att_dataset = TensorDataset(torch.Tensor(X), torch.Tensor(y))

# Label target data as member/non-member to be used as attack model test set
X_test = np.concatenate([t_scores_train.T, t_scores_test.T])
labels_test = np.array(([1] * len(t_dataset_train)) + ([0] * len(t_dataset_test))).reshape(-1,1)
y_test = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels_test)
att_dataset_test = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))

In [None]:
lr = 0.001
epochs = 250
batch_size = 8
weight_decay = 1e-4

att_model = GenericAttackModel(num_feat=len(scalers), dropout=0.3).to(DEVICE)
optimizer = optim.Adam(att_model.parameters(), lr=lr, weight_decay=weight_decay)
weight = compute_class_weight('balanced', classes=np.unique(y.argmax(axis=1)), y=y.argmax(axis=1))

loss_fn = nn.CrossEntropyLoss(weight=torch.tensor(weight).to(DEVICE))

train_model(att_model, optimizer, att_dataset, loss_fn, epochs, batch_size, 
            val_dataset=att_dataset_test, device=DEVICE, verbose=2)
att_model.eval()

In [None]:
# # Create and train target model
# t_model = CustomGATModel(num_feat=num_feat, num_classes=num_categories).to(DEVICE)

# t_save_path = f'mia-models/t_model_gat_{dataset_name}.pth'
# t_model, t_dataset_train, t_dataset_test = load_model(t_model, t_save_path)
# t_model.eval()

In [None]:
# # Create and train shadow model
# s_model = CustomGATModel(num_feat=num_feat, num_classes=num_categories).to(DEVICE)
# s_save_path = f'mia-models/s_model_gat_{dataset_name}.pth'

# s_model, s_dataset_train, s_dataset_test = load_model(s_model, s_save_path)
# s_model.eval()

In [None]:
# logits = predict_multi_graph(s_model, s_dataset_train, device=DEVICE, logits=True, return_type='pt')
# targets = torch.cat([g.y for g in s_dataset_train])
# # get_accuracy(logits, targets)
# get_auroc_score(logits, targets, multiclass=(num_categories>2))

In [None]:
# scalers, thresholds, all_acc, all_auroc, s_scores_train, s_scores_test = search_scaler(s_model, s_dataset_train, s_dataset_test,
#                                                         n_perturb_per_graph=1000, scaler_max=4.1, metric=METRIC)

In [None]:
# t_scores_train = np.array([calculate_robustness_scores(t_model, t_dataset_train, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])
# t_scores_test = np.array([calculate_robustness_scores(t_model, t_dataset_test, scaler=scaler, device=DEVICE, metric=METRIC) for scaler in tqdm(scalers)])

In [None]:
# X_test = np.concatenate([t_scores_train.T, t_scores_test.T])
# labels_test = np.array(([0] * len(t_dataset_train)) + ([1] * len(t_dataset_test))).reshape(-1,1)
# y_test = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels_test)
# att_dataset_test = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))

In [None]:
# X = np.concatenate([s_scores_train.T, s_scores_test.T])
# labels = np.array(([0] * len(s_dataset_train)) + ([1] * len(s_dataset_test))).reshape(-1,1)
# y = OneHotEncoder(categories=[[0,1]], sparse_output=False).fit_transform(labels)

# att_dataset = TensorDataset(torch.Tensor(X), torch.Tensor(y))


In [None]:
# lr = 0.001
# epochs = 250
# batch_size = 16
# weight_decay = 1e-4

# att_model = GenericAttackModel(num_feat=len(scalers), dropout=0.3).to(DEVICE)
# optimizer = optim.Adam(att_model.parameters(), lr=lr, weight_decay=weight_decay)
# weight = compute_class_weight('balanced', classes=np.unique(y.argmax(axis=1)), y=y.argmax(axis=1))

# loss_fn = nn.CrossEntropyLoss(weight=torch.tensor(weight).to(DEVICE))

# train_model(att_model, optimizer, att_dataset, loss_fn, epochs, batch_size, val_dataset=att_dataset_test, device=DEVICE)
# att_model.eval()

In [None]:
att_pred = predict(att_model, att_dataset, device=DEVICE, logits=True, return_type='pt')
att_pred_test = predict(att_model, att_dataset_test, device=DEVICE, logits=True, return_type='pt')

In [None]:
# s_scores_train.T.mean(axis=0)

In [None]:
# s_scores_test.T.mean(axis=0)

In [None]:
# t_scores_train.T.mean(axis=0)

In [None]:
# t_scores_test.T.mean(axis=0)

In [None]:
# Final results
auroc = roc_auc_score(y_test.argmax(axis=1), att_pred_test[:,1].cpu())
precision, recall, thresholds = precision_recall_curve(y_test.argmax(axis=1), att_pred_test[:,1].cpu())
auprc = AUC(recall, precision)
f1 = f1_score(y_test.argmax(axis=1), att_pred_test.cpu().argmax(dim=1))
print(f'glo_mia,none,{dataset_name.lower()},{auroc},{auprc},{f1}')

In [None]:
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay
fpr, tpr, thresholds = roc_curve(y.argmax(axis=1), att_pred[:,1].cpu())

print(roc_auc_score(y.argmax(axis=1), att_pred[:,1].cpu()))
RocCurveDisplay(fpr=fpr, tpr=tpr).plot()

In [None]:
fpr, tpr, thresholds = roc_curve(y_test.argmax(axis=1), att_pred_test[:,1].cpu())
print(roc_auc_score(y_test.argmax(axis=1), att_pred_test[:,1].cpu()))

RocCurveDisplay(fpr=fpr, tpr=tpr).plot()

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test.argmax(axis=1), att_pred_test[:,1].cpu())
print(AUC(recall, precision))
PrecisionRecallDisplay.from_predictions(y_test.argmax(axis=1), att_pred_test[:,1].cpu())

In [None]:
f1_score(y_test.argmax(axis=1), att_pred_test.cpu().argmax(dim=1))

In [None]:
from matplotlib import pyplot as plt
plt.bar(scalers, np.stack(s_scores_train).mean(axis=1), alpha=0.5, width=0.1)
plt.bar(scalers, np.stack(s_scores_test).mean(axis=1), alpha=0.2, width=0.1)

In [None]:
plt.bar(scalers, np.stack(t_scores_train).mean(axis=1), alpha=0.5, width=0.1)
plt.bar(scalers, np.stack(t_scores_test).mean(axis=1), alpha=0.2, width=0.1)

In [None]:
# s = scalers[np.argmax(all_auroc)]
# threshold = thresholds[np.argmax(all_auroc)]
# print(f'Best scaler: {s}')
# print(f'Threshold: {threshold}')
# pred_scores_train = calculate_robustness_scores(t_model, t_dataset_train, scaler=s, device=DEVICE)
# pred_scores_test = calculate_robustness_scores(t_model, t_dataset_test, scaler=s, device=DEVICE)

In [None]:
# pred_scores = np.concatenate([pred_scores_train, pred_scores_test])
# true_scores = np.concatenate([np.ones_like(pred_scores_train), np.zeros_like(pred_scores_test)])

# print(f1_score(true_scores, pred_scores>threshold))

In [None]:
# print(accuracy_score(true_scores, pred_scores>threshold))

In [None]:
# print(roc_auc_score(true_scores, pred_scores))

In [None]:
# from sklearn.metrics import RocCurveDisplay
# fpr, tpr, thresholds = roc_curve(true_scores, pred_scores)

# RocCurveDisplay(fpr=fpr, tpr=tpr).plot()

In [None]:

noise_auroc = {
    0: [],
    0.5: [],
    1.: [],
    2.: []
}

util_loss_fn = nn.CrossEntropyLoss() 
dataset = get_dataset(dataset_name) 
t_dataset_train, t_dataset_test, s_dataset_train, s_dataset_test = shadow_target_split(dataset, target_test_size=0.25, shadow_test_size=0.25)

for sigma in noise_auroc.keys():
    print(f'-------------------------BEGINNING sigma={sigma}-------------------------')
    for i in range(10):
        # Train target model
        t_save_path = f'mia-models/t_model_gat_{dataset_name}_{i}.pth'
        t_model, _ = train_gat(dataset_name, 't', t_dataset_train, dataset_test=t_dataset_test, save_path=t_save_path, device=DEVICE, verbose=1)
        # Train shadow model
        s_save_path = f'mia-models/s_model_gat_{dataset_name}_{i}.pth'
        s_model, _ = train_gat(dataset_name, 's', s_dataset_train, dataset_test=s_dataset_test, save_path=s_save_path, device=DEVICE, verbose=1)

        t_model = LogitsDefenseModel(t_model, sigma=sigma)
        s_model = LogitsDefenseModel(s_model, sigma=sigma)
        t_model.eval()
        s_model.eval()
        # Target model performance on train/test data
        _, acc, f1, auc = test_model_multi_graph(t_model, util_loss_fn, t_dataset_train, device=DEVICE)
        print(f'Target model (train): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')

        _, acc, f1, auc = test_model_multi_graph(t_model, util_loss_fn, t_dataset_test, device=DEVICE)
        print(f'Target model (test): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
        noise_auroc[sigma].append(auc)

        # Shadow model performance on test data
        _, acc, f1, auc = test_model_multi_graph(s_model, util_loss_fn, s_dataset_train, device=DEVICE)
        print(f'Shadow model (train): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')

        _, acc, f1, auc = test_model_multi_graph(s_model, util_loss_fn, s_dataset_test, device=DEVICE)
        print(f'Shadow model (test): acc: {acc:.4f}, F1: {f1:.4f}, AUROC: {auc:.4f}')
        noise_auroc[sigma].append(auc)
        
        

-------------------------BEGINNING sigma=0-------------------------


100%|██████████| 125/125 [00:05<00:00, 21.43it/s]
100%|██████████| 125/125 [00:05<00:00, 21.72it/s]


Target model (train): acc: 0.8857, F1: 0.9167, AUROC: 0.9527
Target model (test): acc: 0.7500, F1: 0.8235, AUROC: 0.7731
Shadow model (train): acc: 0.8857, F1: 0.9111, AUROC: 0.9380
Shadow model (test): acc: 0.9167, F1: 0.9444, AUROC: 0.8235


100%|██████████| 125/125 [00:05<00:00, 21.22it/s]
100%|██████████| 125/125 [00:05<00:00, 21.71it/s]


Target model (train): acc: 0.7714, F1: 0.8571, AUROC: 0.6572
Target model (test): acc: 0.6667, F1: 0.7895, AUROC: 0.4958
Shadow model (train): acc: 0.8571, F1: 0.8936, AUROC: 0.9139
Shadow model (test): acc: 0.8750, F1: 0.9143, AUROC: 0.8992


100%|██████████| 125/125 [00:05<00:00, 21.70it/s]
100%|██████████| 125/125 [00:05<00:00, 21.62it/s]


Target model (train): acc: 0.7857, F1: 0.8624, AUROC: 0.8589
Target model (test): acc: 0.8333, F1: 0.8824, AUROC: 0.7983
Shadow model (train): acc: 0.7571, F1: 0.7733, AUROC: 0.8630
Shadow model (test): acc: 0.5000, F1: 0.5714, AUROC: 0.6050


100%|██████████| 125/125 [00:05<00:00, 21.70it/s]
100%|██████████| 125/125 [00:05<00:00, 21.50it/s]


Target model (train): acc: 0.8714, F1: 0.9126, AUROC: 0.8977
Target model (test): acc: 0.7917, F1: 0.8571, AUROC: 0.8067
Shadow model (train): acc: 0.7429, F1: 0.8269, AUROC: 0.9001
Shadow model (test): acc: 0.8333, F1: 0.8947, AUROC: 0.7899


100%|██████████| 125/125 [00:05<00:00, 21.76it/s]
100%|██████████| 125/125 [00:05<00:00, 21.77it/s]


Target model (train): acc: 0.9857, F1: 0.9897, AUROC: 0.9991
Target model (test): acc: 0.6250, F1: 0.7097, AUROC: 0.6387
Shadow model (train): acc: 0.7714, F1: 0.8431, AUROC: 0.8122
Shadow model (test): acc: 0.8333, F1: 0.8947, AUROC: 0.8739


100%|██████████| 125/125 [00:05<00:00, 21.71it/s]
100%|██████████| 125/125 [00:05<00:00, 21.70it/s]


Target model (train): acc: 0.8429, F1: 0.8764, AUROC: 0.9347
Target model (test): acc: 0.4583, F1: 0.5517, AUROC: 0.5462
Shadow model (train): acc: 0.9000, F1: 0.9176, AUROC: 0.9595
Shadow model (test): acc: 0.7917, F1: 0.8485, AUROC: 0.8235


100%|██████████| 125/125 [00:05<00:00, 21.60it/s]
100%|██████████| 125/125 [00:05<00:00, 21.72it/s]


Target model (train): acc: 0.4143, F1: 0.3051, AUROC: 0.8343
Target model (test): acc: 0.4583, F1: 0.3810, AUROC: 0.5882
Shadow model (train): acc: 0.9000, F1: 0.9231, AUROC: 0.9888
Shadow model (test): acc: 0.9167, F1: 0.9444, AUROC: 0.8992


100%|██████████| 125/125 [00:05<00:00, 21.49it/s]
100%|██████████| 125/125 [00:05<00:00, 21.70it/s]


Target model (train): acc: 0.7714, F1: 0.8182, AUROC: 0.8883
Target model (test): acc: 0.5417, F1: 0.6207, AUROC: 0.6303
Shadow model (train): acc: 0.8714, F1: 0.9032, AUROC: 0.9302
Shadow model (test): acc: 0.8750, F1: 0.9143, AUROC: 0.8235


100%|██████████| 125/125 [00:05<00:00, 21.78it/s]
100%|██████████| 125/125 [00:05<00:00, 21.26it/s]


Target model (train): acc: 0.5429, F1: 0.5676, AUROC: 0.7386
Target model (test): acc: 0.5000, F1: 0.5714, AUROC: 0.5966
Shadow model (train): acc: 0.8000, F1: 0.8600, AUROC: 0.8941
Shadow model (test): acc: 0.8750, F1: 0.9189, AUROC: 0.8655


100%|██████████| 125/125 [00:05<00:00, 21.62it/s]
100%|██████████| 125/125 [00:05<00:00, 21.76it/s]


Target model (train): acc: 0.7143, F1: 0.7959, AUROC: 0.7775
Target model (test): acc: 0.5833, F1: 0.7059, AUROC: 0.4790
Shadow model (train): acc: 0.8857, F1: 0.9130, AUROC: 0.9216
Shadow model (test): acc: 0.9167, F1: 0.9444, AUROC: 0.8739
-------------------------BEGINNING sigma=0.5-------------------------


100%|██████████| 125/125 [00:05<00:00, 21.71it/s]
100%|██████████| 125/125 [00:05<00:00, 21.74it/s]


Target model (train): acc: 0.7143, F1: 0.7436, AUROC: 0.9214
Target model (test): acc: 0.5833, F1: 0.6429, AUROC: 0.5714
Shadow model (train): acc: 0.7429, F1: 0.7429, AUROC: 0.9018
Shadow model (test): acc: 0.6250, F1: 0.6400, AUROC: 0.8739


100%|██████████| 125/125 [00:05<00:00, 21.73it/s]
100%|██████████| 125/125 [00:05<00:00, 21.74it/s]


Target model (train): acc: 0.4143, F1: 0.2545, AUROC: 0.7936
Target model (test): acc: 0.2500, F1: 0.1000, AUROC: 0.3445
Shadow model (train): acc: 0.7143, F1: 0.8077, AUROC: 0.8002
Shadow model (test): acc: 0.7917, F1: 0.8718, AUROC: 0.8235


100%|██████████| 125/125 [00:05<00:00, 21.77it/s]
100%|██████████| 125/125 [00:05<00:00, 21.54it/s]


Target model (train): acc: 0.8143, F1: 0.8762, AUROC: 0.9205
Target model (test): acc: 0.8750, F1: 0.9091, AUROC: 0.8571
Shadow model (train): acc: 0.8429, F1: 0.8791, AUROC: 0.9569
Shadow model (test): acc: 0.7917, F1: 0.8649, AUROC: 0.7563


100%|██████████| 125/125 [00:05<00:00, 21.72it/s]
100%|██████████| 125/125 [00:05<00:00, 21.73it/s]


Target model (train): acc: 0.9143, F1: 0.9375, AUROC: 0.9631
Target model (test): acc: 0.5833, F1: 0.6875, AUROC: 0.5714
Shadow model (train): acc: 0.7857, F1: 0.8387, AUROC: 0.8467
Shadow model (test): acc: 0.7917, F1: 0.8485, AUROC: 0.8487


100%|██████████| 125/125 [00:05<00:00, 21.63it/s]
100%|██████████| 125/125 [00:05<00:00, 21.71it/s]


Target model (train): acc: 0.4714, F1: 0.4789, AUROC: 0.6449
Target model (test): acc: 0.3750, F1: 0.4000, AUROC: 0.4622
Shadow model (train): acc: 0.7429, F1: 0.8125, AUROC: 0.8605
Shadow model (test): acc: 0.7917, F1: 0.8649, AUROC: 0.7563


100%|██████████| 125/125 [00:05<00:00, 21.68it/s]
  2%|▏         | 3/125 [00:00<00:05, 22.83it/s]

In [None]:
[(a,np.mean(x)) for a,x in noise_auroc.items()]