<a href="https://colab.research.google.com/github/Nilufayeasmin299/Reproduce-GNN_Ownership_Verification/blob/main/GNN_ownership_using_Amazon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install Required Libraries
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install torch_geometric
!pip install numpy scikit-learn tqdm pyyaml argparse

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1
Collecting argparse
  Downloading argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Downloading argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


In [2]:
import torch
import numpy as np
from torch_geometric.datasets import Amazon
from torch_geometric.nn import GCNConv, GATConv
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Configuration
config = {
    'dataset': 'Amazon',  # Dataset name
    'dataset_type': 'Computers',  # Subset of the Amazon dataset
    'model_type': 'gat',  # Model type: gcn, gat, sage, GIN, SGC
    'hidden_dims': [352, 256],  # Hidden layer dimensions
    'embedding_dim': 128,  # Embedding dimension
    'epochs': 100,  # Number of training epochs
    'learning_rate': 0.01,  # Learning rate for optimization
    'mask_ratio': 0.2,  # Ratio of features to mask
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'n_runs': 3,  # Number of runs for robust evaluation
    'seed': 42,  # Random seed for reproducibility
}

In [3]:
# Step 3: Set Random Seeds
def set_random_seeds(seed):
    """Set random seeds for reproducibility."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    if config['device'] == 'cuda':
        torch.cuda.manual_seed_all(seed)

set_random_seeds(config['seed'])

In [4]:
# Step 4: Feature Masking
def mask_features(data, mask_ratio):
    """Mask a subset of node features to enrich model fingerprinting."""
    num_features = data.x.shape[1]
    mask = np.random.choice([0, 1], size=num_features, p=[mask_ratio, 1 - mask_ratio])
    data.x = data.x * torch.tensor(mask, dtype=torch.float32, device=config['device'])
    return data

In [5]:
# Step 5: GNN Model
class GNNModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, model_type='gat'):
        super(GNNModel, self).__init__()
        if model_type == 'gcn':
            self.conv1 = GCNConv(input_dim, hidden_dims[0])
            self.conv2 = GCNConv(hidden_dims[0], hidden_dims[1])
        elif model_type == 'gat':
            self.conv1 = GATConv(input_dim, hidden_dims[0])
            self.conv2 = GATConv(hidden_dims[0], hidden_dims[1])
        else:
            raise ValueError(f"Unsupported model type: {model_type}")
        self.fc = torch.nn.Linear(hidden_dims[1], output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = torch.relu(self.conv1(x, edge_index))
        x = torch.relu(self.conv2(x, edge_index))
        x = self.fc(x)
        return torch.log_softmax(x, dim=1)

In [6]:
# Step 6: Train and Evaluate Functions
def train_model(model, data, optimizer, criterion, epochs):
    """Train the GNN model with progress tracking."""
    model.train()
    for epoch in tqdm(range(epochs), desc="Training"):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

def evaluate_model(model, data):
    """Evaluate model accuracy on the test set."""
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    return accuracy_score(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu())

In [7]:
# Step 7: Ownership Verification
def evaluate_ownership(target_model, shadow_model, data):
    """Train and evaluate the ownership verification classifier."""
    target_model.eval()
    shadow_model.eval()

    # Extract embeddings
    target_embeddings = target_model(data).detach().cpu().numpy()
    shadow_embeddings = shadow_model(data).detach().cpu().numpy()

    # Prepare data for classifier
    X = np.vstack([target_embeddings, shadow_embeddings])
    y = np.array([1] * len(target_embeddings) + [0] * len(shadow_embeddings))

    # Split into train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=config['seed'])

    # Train RandomForest Classifier
    clf = RandomForestClassifier(random_state=config['seed'])
    clf.fit(X_train, y_train)

    # Evaluate
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0

    return acc, fpr, fnr

In [8]:
# Step 8: Load Dataset and Create Masks
# Load the dataset
dataset = Amazon(root='./data', name=config['dataset_type'])
data = dataset[0].to(config['device'])

# Generate train/val/test masks
num_nodes = data.num_nodes
indices = np.arange(num_nodes)
np.random.shuffle(indices)

train_size = int(0.6 * num_nodes)
val_size = int(0.2 * num_nodes)

train_indices = indices[:train_size]
val_indices = indices[train_size:train_size + val_size]
test_indices = indices[train_size + val_size:]

# Create masks
data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

data.train_mask[train_indices] = True
data.val_mask[val_indices] = True
data.test_mask[test_indices] = True

# Apply feature masking
data = mask_features(data, config['mask_ratio'])

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!


In [9]:
# Step 9: Initialize Models
target_model = GNNModel(dataset.num_features, config['hidden_dims'], dataset.num_classes, config['model_type']).to(config['device'])
shadow_model = GNNModel(dataset.num_features, config['hidden_dims'], dataset.num_classes, config['model_type']).to(config['device'])

In [10]:
# Step 10: Train Models
optimizer = torch.optim.Adam(target_model.parameters(), lr=config['learning_rate'])
criterion = torch.nn.NLLLoss()

train_model(target_model, data, optimizer, criterion, config['epochs'])
train_model(shadow_model, data, optimizer, criterion, config['epochs'])

Training: 100%|██████████| 100/100 [00:12<00:00,  8.00it/s]
Training: 100%|██████████| 100/100 [00:09<00:00, 10.48it/s]


In [11]:
# Step 11: Evaluate Models
target_acc = evaluate_model(target_model, data)
shadow_acc = evaluate_model(shadow_model, data)
print(f"Target Model Test Accuracy: {target_acc:.4f}")
print(f"Shadow Model Test Accuracy: {shadow_acc:.4f}")

Target Model Test Accuracy: 0.8870
Shadow Model Test Accuracy: 0.0116


In [12]:
# Step 12: Ownership Verification
ownership_acc, fpr, fnr = evaluate_ownership(target_model, shadow_model, data)
print(f"Ownership Classifier Accuracy: {ownership_acc:.4f}, FPR: {fpr:.4f}, FNR: {fnr:.4f}")

Ownership Classifier Accuracy: 1.0000, FPR: 0.0000, FNR: 0.0000
