In [33]:
import torch
from torch_geometric.datasets import Planetoid
from sklearn.model_selection import train_test_split
import numpy as np
import random

# Set random seed for reproducibility
seed = 42  # You can change this number, but it should be the same across all runs
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Load the PubMed dataset
dataset = Planetoid(root='/tmp/PubMed', name='PubMed')
data = dataset[0]

# Split nodes into target and shadow sets
nodes = np.arange(data.num_nodes)
target_nodes, shadow_nodes = train_test_split(nodes, test_size=0.5, random_state=seed)

# Create training masks
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[target_nodes] = True

shadow_data = data.clone()
shadow_data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
shadow_data.train_mask[shadow_nodes] = True

# Inspect dataset
print(f"Dataset: {dataset}")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {data.num_features}")
print(f"Number of classes: {dataset.num_classes}")
print(f"Train Mask: {data.train_mask.sum()} nodes")
print(f"Test Mask: {data.test_mask.sum()} nodes")
print(f"Target Dataset Nodes: {len(target_nodes)}")
print(f"Shadow Dataset Nodes: {len(shadow_nodes)}")


Dataset: PubMed()
Number of nodes: 19717
Number of edges: 88648
Number of features: 500
Number of classes: 3
Train Mask: 9858 nodes
Test Mask: 1000 nodes
Target Dataset Nodes: 9858
Shadow Dataset Nodes: 9859


In [34]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 128)
        self.conv2 = GCNConv(128, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Initialize models
target_model = GCN(dataset.num_node_features, dataset.num_classes)
shadow_model = GCN(dataset.num_node_features, dataset.num_classes)

print("Models initialized.")


Models initialized.


In [35]:
import torch.optim as optim
# Hyperparameters
learning_rate = 0.001
epochs = 200

def train_model(model, data, optimizer, num_epochs=200):
    model.train()
    for epoch in range(1, num_epochs + 1):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 50 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

# Train models
target_optimizer = optim.Adam(target_model.parameters(), lr=learning_rate, weight_decay=5e-4)
shadow_optimizer = optim.Adam(shadow_model.parameters(), lr=learning_rate, weight_decay=5e-4)

# Define accuracy calculation
def accuracy(output, labels, mask):
    # Get the predicted class for each node
    _, pred = output.max(dim=1)
    
    # Apply the mask to get the relevant labels
    correct = (pred[mask] == labels[mask]).sum().item()
    total = mask.sum().item()
    
    return correct / total if total > 0 else 0

print("Training Target Model...")
train_model(target_model, data, target_optimizer)

print("Training Shadow Model...")
train_model(shadow_model, shadow_data, shadow_optimizer)

def test_model(model, data):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation for efficiency
        output = model(data)
        test_acc = accuracy(output, data.y, data.test_mask)
    print(f"Test Accuracy: {test_acc:.4f}")

print("Testing Target Model...")
test_model(target_model, data)

print("Testing Shadow Model...")
test_model(shadow_model, shadow_data)

Training Target Model...
Epoch 50/200, Loss: 0.8608
Epoch 100/200, Loss: 0.6319
Epoch 150/200, Loss: 0.5199
Epoch 200/200, Loss: 0.4680
Training Shadow Model...
Epoch 50/200, Loss: 0.8688
Epoch 100/200, Loss: 0.6268
Epoch 150/200, Loss: 0.5103
Epoch 200/200, Loss: 0.4593
Testing Target Model...
Test Accuracy: 0.8630
Testing Shadow Model...
Test Accuracy: 0.8650


In [36]:
def posterior_attack(model, data, node_u, node_v, hops=0):
    model.eval()
    with torch.no_grad():
        embedding_u = model.conv1(data.x, data.edge_index)[node_u]
        embedding_v = model.conv1(data.x, data.edge_index)[node_v]

        if hops >= 1:
            neighbors_u = data.edge_index[1][data.edge_index[0] == node_u]
            neighbors_v = data.edge_index[1][data.edge_index[0] == node_v]
            embedding_u += torch.mean(model.conv1(data.x, data.edge_index)[neighbors_u], dim=0)
            embedding_v += torch.mean(model.conv1(data.x, data.edge_index)[neighbors_v], dim=0)
#working fine here

        if hops == 2:
            two_hop_neighbors_u = data.edge_index[1][torch.isin(data.edge_index[0], neighbors_u)]
            two_hop_neighbors_v = data.edge_index[1][torch.isin(data.edge_index[0], neighbors_v)]
            embedding_u += torch.mean(model.conv1(data.x, data.edge_index)[two_hop_neighbors_u], dim=0)
            embedding_v += torch.mean(model.conv1(data.x, data.edge_index)[two_hop_neighbors_v], dim=0)

        similarity = torch.dot(embedding_u, embedding_v)
    return similarity.item()

print("Posterior-only attack implemented.")


Posterior-only attack implemented.


In [37]:

def generate_node_pairs(data, num_pairs=50):
    edge_index = data.edge_index.cpu().numpy()
    num_nodes = data.num_nodes

    # Positive pairs (linked nodes)
    positive_pairs = list(zip(edge_index[0], edge_index[1]))
    positive_pairs = random.sample(positive_pairs, min(len(positive_pairs), num_pairs // 2))

    # Negative pairs (unlinked nodes)
    negative_pairs = []
    existing_pairs = set(positive_pairs)
    while len(negative_pairs) < num_pairs // 2:
        u, v = random.randint(0, num_nodes - 1), random.randint(0, num_nodes - 1)
        if u != v and (u, v) not in existing_pairs and (v, u) not in existing_pairs:
            negative_pairs.append((u, v))

    node_pairs = positive_pairs + negative_pairs
    labels = [1] * len(positive_pairs) + [0] * len(negative_pairs)
    return node_pairs, labels

node_pairs, true_labels = generate_node_pairs(data, num_pairs=50)
print(f"Generated {len(node_pairs)} node pairs.")
print("Generated node pairs as:", node_pairs)


Generated 50 node pairs.
Generated node pairs as: [(14651, 18670), (8131, 3136), (5839, 735), (11637, 7932), (9723, 7089), (14195, 6412), (529, 3957), (8763, 2862), (12811, 15824), (18908, 2408), (16003, 17111), (8296, 12066), (18661, 905), (6818, 869), (530, 2591), (13358, 6273), (2385, 6697), (8449, 14618), (18580, 17471), (5270, 767), (19125, 16261), (4632, 5661), (6475, 18786), (5223, 15813), (1581, 12015), (7223, 14719), (19309, 9115), (212, 5231), (13848, 11149), (9105, 5094), (7055, 11029), (3349, 3039), (12449, 3169), (11763, 11270), (8667, 1423), (15054, 17571), (4090, 12403), (2582, 18089), (9606, 11850), (18918, 6300), (2279, 1501), (7467, 9482), (2614, 7628), (3309, 12455), (9108, 14857), (11954, 5329), (12130, 11641), (6865, 8748), (2339, 5607), (17502, 8021)]


In [38]:
from sklearn.metrics import roc_auc_score

for hops in range(3):  # 0-hop, 1-hop, 2-hop
    posterior_scores = [posterior_attack(target_model, data, u, v, hops=hops) for u, v in node_pairs]
    auc = roc_auc_score(true_labels, posterior_scores)
    print(f"{hops}-hop Posterior-Only Attack AUC: {auc:.4f}")


0-hop Posterior-Only Attack AUC: 0.9520
1-hop Posterior-Only Attack AUC: 0.8208
2-hop Posterior-Only Attack AUC: 0.8736


In [39]:
def pairwise_operations(features_u, features_v):
    """Construct symmetric features using pairwise operations."""
    hadamard = features_u * features_v
    average = (features_u + features_v) / 2
    weighted_l1 = torch.abs(features_u - features_v)
    weighted_l2 = (features_u - features_v) ** 2
    return torch.cat([hadamard, average, weighted_l1, weighted_l2], dim=0)


In [40]:
def compute_graph_features(data, node_u, node_v):
    neighbors_u = set(data.edge_index[1][data.edge_index[0] == node_u].cpu().numpy())
    neighbors_v = set(data.edge_index[1][data.edge_index[0] == node_v].cpu().numpy())
    common_neighbors = len(neighbors_u.intersection(neighbors_v))
    union = len(neighbors_u.union(neighbors_v))
    jaccard = common_neighbors / union if union > 0 else 0.0
    return torch.tensor([common_neighbors, jaccard], dtype=torch.float32)


In [41]:
def combined_attack(model, data, node_u, node_v, use_attributes=True, use_graph_features=True, hops=0):
    model.eval()
    with torch.no_grad():
        # Posterior embeddings
        embedding_u = model.conv1(data.x, data.edge_index)[node_u]
        embedding_v = model.conv1(data.x, data.edge_index)[node_v]
        
        if hops >= 1:
            neighbors_u = data.edge_index[1][data.edge_index[0] == node_u]
            neighbors_v = data.edge_index[1][data.edge_index[0] == node_v]
            embedding_u += torch.mean(model.conv1(data.x, data.edge_index)[neighbors_u], dim=0)
            embedding_v += torch.mean(model.conv1(data.x, data.edge_index)[neighbors_v], dim=0)

        # Node attributes
        attribute_features = torch.zeros(0)
        if use_attributes:
            attribute_features = pairwise_operations(data.x[node_u], data.x[node_v])

        # Graph features
        graph_features = torch.zeros(0)
        if use_graph_features:
            graph_features = compute_graph_features(data, node_u, node_v)

        # Combine features
        combined_features = torch.cat([embedding_u, embedding_v, attribute_features, graph_features])
    return combined_features


In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Prepare features and labels
def prepare_features_and_labels(node_pairs, true_labels, model, data, use_attributes, use_graph_features, hops):
    features = [combined_attack(model, data, u, v, use_attributes, use_graph_features, hops).numpy() for u, v in node_pairs]
    return np.array(features), np.array(true_labels)

# Train and evaluate combined attacks
def evaluate_combined_attack(use_attributes, use_graph_features, hops):
    X, y = prepare_features_and_labels(node_pairs, true_labels, target_model, data, use_attributes, use_graph_features, hops)
    clf = LogisticRegression(max_iter=1000).fit(X, y)
    y_pred = clf.predict_proba(X)[:, 1]
    auc = roc_auc_score(y, y_pred)
    return auc

print("Evaluating Combined Attacks...")
for hops in range(3):
    for use_attributes, use_graph_features in [(True, False), (False, True), (True, True)]:
        auc = evaluate_combined_attack(use_attributes, use_graph_features, hops)
        attributes = "Attributes" if use_attributes else ""
        graph = "Graph Features" if use_graph_features else ""
        print(f"{hops}-hop Combined Attack ({attributes} {graph}): AUC = {auc:.4f}")


Evaluating Combined Attacks...
0-hop Combined Attack (Attributes ): AUC = 0.9440
0-hop Combined Attack ( Graph Features): AUC = 0.9536
0-hop Combined Attack (Attributes Graph Features): AUC = 0.9952
1-hop Combined Attack (Attributes ): AUC = 0.8544
1-hop Combined Attack ( Graph Features): AUC = 0.8448
1-hop Combined Attack (Attributes Graph Features): AUC = 0.9152
2-hop Combined Attack (Attributes ): AUC = 0.8544
2-hop Combined Attack ( Graph Features): AUC = 0.8448
2-hop Combined Attack (Attributes Graph Features): AUC = 0.9152


In [43]:
baseline_common = [compute_graph_features(data, u, v)[0].item() for u, v in node_pairs]
auc_common = roc_auc_score(true_labels, baseline_common)

baseline_jaccard = [compute_graph_features(data, u, v)[1].item() for u, v in node_pairs]
auc_jaccard = roc_auc_score(true_labels, baseline_jaccard)

print(f"Common Neighbors Baseline AUC: {auc_common:.4f}")
print(f"Jaccard Coefficient Baseline AUC: {auc_jaccard:.4f}")


Common Neighbors Baseline AUC: 0.6200
Jaccard Coefficient Baseline AUC: 0.6200


In [44]:
def label_only_defense(model, data, node_pairs):
    model.eval()
    with torch.no_grad():
        predictions = torch.argmax(model(data), dim=1)
        scores = [1 if predictions[u] == predictions[v] else 0 for u, v in node_pairs]
    return scores

label_defense_scores = label_only_defense(target_model, data, node_pairs)
auc_label_defense = roc_auc_score(true_labels, label_defense_scores)
print(f"Label-Only Defense AUC: {auc_label_defense:.4f}")


Label-Only Defense AUC: 0.7800


In [45]:
def dp_gcn_defense(model, data, epsilon=1.0):
    noise = torch.randn_like(data.x) * epsilon
    perturbed_data = data.clone()
    perturbed_data.x += noise
    return model(perturbed_data)

perturbed_output = dp_gcn_defense(target_model, data, epsilon=1.0)
print("DP-GCN defense applied.")


DP-GCN defense applied.
