In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import DataLoader, Dataset
from torch_geometric.nn import GCNConv, GraphSAGE, GATConv, GINConv
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import networkx as nx
import numpy as np
from torch_geometric.datasets import Planetoid


In [2]:
import torch
from torch_geometric.datasets import Planetoid
from sklearn.model_selection import train_test_split
import numpy as np
import random

# Set random seed for reproducibility
seed = 42  # You can change this number, but it should be the same across all runs
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Load the PubMed dataset
dataset = Planetoid(root='/tmp/PubMed', name='PubMed')
data = dataset[0]

# Split nodes into target and shadow sets
nodes = np.arange(data.num_nodes)
target_nodes, shadow_nodes = train_test_split(nodes, test_size=0.5, random_state=seed)

# Verify disjoint sets
overlap = np.intersect1d(target_nodes, shadow_nodes)
if len(overlap) == 0:
    print("The target and shadow datasets are disjoint.")
else:
    print(f"The target and shadow datasets are not disjoint. Overlapping nodes: {len(overlap)}")


# Create training masks
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[target_nodes] = True

shadow_data = data.clone()
shadow_data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
shadow_data.train_mask[shadow_nodes] = True

# Inspect dataset
print(f"Dataset: {dataset}")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {data.num_features}")
print(f"Number of classes: {dataset.num_classes}")
print(f"Train Mask: {data.train_mask.sum()} nodes")
print(f"Test Mask: {data.test_mask.sum()} nodes")
print(f"Target Dataset Nodes: {len(target_nodes)}")
print(f"Shadow Dataset Nodes: {len(shadow_nodes)}")

The target and shadow datasets are disjoint.
Dataset: PubMed()
Number of nodes: 19717
Number of edges: 88648
Number of features: 500
Number of classes: 3
Train Mask: 9858 nodes
Test Mask: 1000 nodes
Target Dataset Nodes: 9858
Shadow Dataset Nodes: 9859


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)  # 0.5 dropout rate
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)  # Log-softmax for classification


In [5]:
# Hyperparameters
hidden_channels = 128
learning_rate = 0.001
epochs = 200

# Initialize models
target_model = GCN(dataset.num_node_features, hidden_channels, dataset.num_classes)
shadow_model = GCN(dataset.num_node_features, hidden_channels, dataset.num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
target_optimizer = torch.optim.Adam(target_model.parameters(), lr=learning_rate)
shadow_optimizer = torch.optim.Adam(shadow_model.parameters(), lr=learning_rate)

# Function to train a model
def train_model(model, optimizer, data, epochs):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Masked training loss
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 50 == 0:  # Log progress every 50 epochs
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

# Train the target and shadow models
print("Training Target Model...")
train_model(target_model, target_optimizer, data, epochs)

print("Training Shadow Model...")
train_model(shadow_model, shadow_optimizer, shadow_data, epochs)

print("Models trained successfully.")


Training Target Model...
Epoch 50/200, Loss: 0.8198
Epoch 100/200, Loss: 0.5715
Epoch 150/200, Loss: 0.4616
Epoch 200/200, Loss: 0.4118
Training Shadow Model...
Epoch 50/200, Loss: 0.8217
Epoch 100/200, Loss: 0.5711
Epoch 150/200, Loss: 0.4567
Epoch 200/200, Loss: 0.4043
Models trained successfully.


In [11]:

class Baseline0(nn.Module):
    def __init__(self, in_features):
        super(Baseline0, self).__init__()
        self.fc1 = nn.Linear(in_features, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 2)  # Binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5, training=self.training)  # Dropout rate = 0.5
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)  # Log-softmax for binary classification


In [12]:
class Baseline1(nn.Module):
    def __init__(self, in_features):
        super(Baseline1, self).__init__()
        self.fc1 = nn.Linear(in_features, 16)
        self.fc2 = nn.Linear(16, 2)  # Binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


In [13]:
class Baseline2(nn.Module):
    def __init__(self, node_features, graph_features):
        super(Baseline2, self).__init__()
        # Sub-network for node attributes
        self.node_fc1 = nn.Linear(node_features, 256)
        self.node_fc2 = nn.Linear(256, 64)
        self.node_fc3 = nn.Linear(64, 8)

        # Sub-network for graph features
        self.graph_fc = nn.Linear(graph_features, 1)

        # Final layer
        self.final_fc = nn.Linear(8 + 1, 2)  # Concatenated inputs, binary classification

    def forward(self, node_x, graph_x):
        # Node attributes sub-network
        x1 = F.relu(self.node_fc1(node_x))
        x1 = F.dropout(x1, p=0.5, training=self.training)
        x1 = F.relu(self.node_fc2(x1))
        x1 = F.dropout(x1, p=0.5, training=self.training)
        x1 = F.relu(self.node_fc3(x1))

        # Graph features sub-network
        x2 = F.relu(self.graph_fc(graph_x))

        # Concatenate and pass through final layer
        x = torch.cat([x1, x2], dim=1)
        x = self.final_fc(x)
        return F.log_softmax(x, dim=1)


In [17]:
# Define Baseline Models
baseline0 = Baseline0(in_features=data.x.size(1))
baseline1 = Baseline1(in_features=data.x.size(1))
baseline2 = Baseline2(node_features=data.x.size(1), graph_features=1)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizers for each baseline model
learning_rate = 0.001
optimizer_baseline0 = torch.optim.Adam(baseline0.parameters(), lr=learning_rate)
optimizer_baseline1 = torch.optim.Adam(baseline1.parameters(), lr=learning_rate)
optimizer_baseline2 = torch.optim.Adam(baseline2.parameters(), lr=learning_rate)

# Define cosine annealing schedulers
epochs = 200
scheduler_baseline0 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_baseline0, T_max=epochs)
scheduler_baseline1 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_baseline1, T_max=epochs)
scheduler_baseline2 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_baseline2, T_max=epochs)

print("Baseline models, optimizers, and schedulers initialized.")


Baseline models, optimizers, and schedulers initialized.


In [23]:
class PosteriorAttackModel(nn.Module):
    def __init__(self, in_features):
        super(PosteriorAttackModel, self).__init__()
        # Three linear layers with sizes 128, 32, and 2 neurons
        self.fc1 = nn.Linear(in_features, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 2)  # Output size is 2 for binary classification

    def forward(self, x):
        x = F.relu(self.fc1(x))  # ReLU activation for the first layer
        x = F.dropout(x, p=0.5, training=self.training)  # Dropout for regularization
        x = F.relu(self.fc2(x))  # ReLU activation for the second layer
        x = F.dropout(x, p=0.5, training=self.training)  # Dropout for regularization
        x = self.fc3(x)  # Final layer for binary classification
        return F.log_softmax(x, dim=1)  # Log-softmax for binary classification output


In [24]:
# Initialize Posterior Attack Model
posterior_attack_model = PosteriorAttackModel(in_features=data.x.size(1))  # Adjust input dimension accordingly

# Loss function and optimizer for Posterior Attack Model
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer_posterior = torch.optim.Adam(posterior_attack_model.parameters(), lr=learning_rate)

# Cosine Annealing Scheduler
epochs = 200
scheduler_posterior = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_posterior, T_max=epochs)

print("Posterior-only Attack model, optimizer, and scheduler initialized.")


Posterior-only Attack model, optimizer, and scheduler initialized.


In [26]:
def train_model(model, optimizer, scheduler, data, epochs):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        
        # Forward pass
        out = model(data.x)  # Model output with shape [batch_size, num_classes]
        
        # Ensure that target labels are correctly indexed and in the valid range [0, 1]
        target = data.y[data.train_mask]  # Target labels for the current training mask
        target = torch.clamp(target, min=0, max=1)  # Clamp to ensure valid binary targets
        
        # Compute the loss
        loss = criterion(out[data.train_mask], target)
        
        # Backpropagation
        loss.backward()
        
        # Optimizer step
        optimizer.step()
        
        # Scheduler step (for learning rate annealing)
        scheduler.step()
        
        # Print training progress every 10 epochs
        if epoch % 10 == 0:
            print(f"Epoch {epoch}/{epochs}, Loss: {loss.item()}")
