<a href="https://colab.research.google.com/github/Nilufayeasmin299/Reproduce-GNN_Ownership_Verification/blob/main/Final_Preproduction_of_GNN_Ownership_verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision torch-geometric numpy scikit-learn matplotlib tqdm



In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid, Amazon
from torch_geometric.nn import GCNConv, GATConv, SAGEConv, GINConv
from torch_geometric.transforms import NormalizeFeatures
from sklearn.metrics import accuracy_score, confusion_matrix
import random
import numpy as np

In [None]:
# Configuration
CONFIG = {
    'learning_rate': 0.005,
    'weight_decay': 5e-4,
    'hidden_dim': 256,  # Increased hidden dimension for deeper representations
    'dropout': 0.3,
    'epochs': 400,
    'edge_dropout_rate': 0.1,
    'mask_ratios': [0.0, 0.05, 0.1, 0.2],
    'architectures': ['GCN', 'GAT', 'SAGE', 'GIN'],
    'settings': ['transductive', 'inductive'],
    'early_stopping_patience': 30  # Early stopping patience
}

In [None]:
# Set seeds for reproducibility
random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

In [None]:
def create_masks(data, train_ratio=0.6, val_ratio=0.2):
    num_nodes = data.num_nodes
    indices = torch.randperm(num_nodes)
    train_size = int(train_ratio * num_nodes)
    val_size = int(val_ratio * num_nodes)

    data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    data.train_mask[indices[:train_size]] = True
    data.val_mask[indices[train_size:train_size + val_size]] = True
    data.test_mask[indices[train_size + val_size:]] = True
    return data

def augment_data(data, edge_dropout_rate):
    num_edges = data.edge_index.shape[1]
    keep_edges = int(num_edges * (1 - edge_dropout_rate))
    perm = torch.randperm(num_edges)[:keep_edges]
    data.edge_index = data.edge_index[:, perm]
    return data

def mask_features(data, mask_ratio):
    num_features = data.x.shape[1]
    num_masked = int(mask_ratio * num_features)
    mask_indices = torch.randperm(num_features)[:num_masked]
    data.x[:, mask_indices] = 0
    return data

def split_inductive(data):
    """Simulate inductive setting by splitting the graph into disjoint subgraphs."""
    num_nodes = data.num_nodes
    indices = torch.randperm(num_nodes)
    split_idx = num_nodes // 2
    train_idx = indices[:split_idx]
    test_idx = indices[split_idx:]

    train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    train_mask[train_idx] = True
    test_mask[test_idx] = True

    data.train_mask = train_mask
    data.test_mask = test_mask
    return data


In [None]:
class EnhancedGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=3, dropout=0.3, arch='GCN'):
        super().__init__()
        self.num_layers = num_layers
        self.dropout = dropout
        self.arch = arch

        self.convs = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        if arch == 'GCN':
            self.convs.append(GCNConv(in_channels, hidden_channels))
        elif arch == 'GAT':
            self.convs.append(GATConv(in_channels, hidden_channels))
        elif arch == 'SAGE':
            self.convs.append(SAGEConv(in_channels, hidden_channels))
        elif arch == 'GIN':
            gin_nn = nn.Sequential(
                nn.Linear(in_channels, hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, hidden_channels)
            )
            self.convs.append(GINConv(gin_nn))

        self.batch_norms.append(nn.BatchNorm1d(hidden_channels))

        for _ in range(num_layers - 2):
            if arch == 'GCN':
                self.convs.append(GCNConv(hidden_channels, hidden_channels))
            elif arch == 'GAT':
                self.convs.append(GATConv(hidden_channels, hidden_channels))
            elif arch == 'SAGE':
                self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            elif arch == 'GIN':
                gin_nn = nn.Sequential(
                    nn.Linear(hidden_channels, hidden_channels),
                    nn.ReLU(),
                    nn.Linear(hidden_channels, hidden_channels)
                )
                self.convs.append(GINConv(gin_nn))
            self.batch_norms.append(nn.BatchNorm1d(hidden_channels))

        if arch == 'GCN':
            self.convs.append(GCNConv(hidden_channels, out_channels))
        elif arch == 'GAT':
            self.convs.append(GATConv(hidden_channels, out_channels))
        elif arch == 'SAGE':
            self.convs.append(SAGEConv(hidden_channels, out_channels))
        elif arch == 'GIN':
            gin_nn = nn.Sequential(
                nn.Linear(hidden_channels, hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, out_channels)
            )
            self.convs.append(GINConv(gin_nn))

    def forward(self, x, edge_index):
        for i in range(self.num_layers - 1):
            x = self.convs[i](x, edge_index)
            x = self.batch_norms[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.convs[-1](x, edge_index)
        return x

def train_and_evaluate(model, data, config):
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)
    criterion = nn.CrossEntropyLoss()

    best_loss = float('inf')
    patience_counter = 0

    for epoch in range(config['epochs']):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        scheduler.step(loss)

        model.eval()
        pred = out.argmax(dim=1)
        acc = accuracy_score(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu())

        cm = confusion_matrix(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu())
        print(f"Confusion matrix shape: {cm.shape}")
        #Calculate FPR and FNR for multi-class classification
        FP = cm.sum(axis=0) - np.diag(cm)
        FN = cm.sum(axis=1) - np.diag(cm)
        TP = np.diag(cm)
        TN = cm.sum() - (FP + FN + TP)

        # Then calculate the rates using these arrays:
        FPR = FP / (FP + TN)
        FNR = FN / (TP + FN)

        # To get the average rates across all classes:
        avg_fpr = np.mean(FPR)
        avg_fnr = np.mean(FNR)

        print(f"Epoch {epoch} | Loss: {loss:.4f} | Accuracy: {acc:.4f} | FPR: {avg_fpr:.4f} | FNR: {avg_fnr:.4f}")

        # Early stopping based on validation loss
        if loss < best_loss:
            best_loss = loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= config['early_stopping_patience']:
            print("Early stopping triggered.")
            break

    return acc, avg_fpr, avg_fnr # Return the average rates


In [None]:
# Example usage with different datasets, settings, and masking ratios:
datasets = [
    ('Cora', Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures())),
    ('CiteSeer', Planetoid(root='/tmp/CiteSeer', name='CiteSeer', transform=NormalizeFeatures())),
    ('PubMed', Planetoid(root='/tmp/PubMed', name='PubMed', transform=NormalizeFeatures())),
    ('Amazon', Amazon(root='/tmp/Amazon', name='Computers'))
]

results = {}

In [None]:
for dataset_name, dataset in datasets:
    print(f"Running experiments on dataset: {dataset_name}")
    data = dataset[0]
    # Move the initial data to the appropriate device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data = data.to(device)
    dataset_results = []

    for setting in CONFIG['settings']:
        print(f"\nRunning in {setting} setting")
        if setting == 'inductive':
            current_data = split_inductive(data.clone())  # Use current_data
        else:
            current_data = create_masks(data.clone())  # Use current_data

        for mask_ratio in CONFIG['mask_ratios']:
            print(f"\nRunning experiment with mask ratio: {mask_ratio}")
            masked_data = mask_features(current_data.clone(), mask_ratio)  # Clone from current_data

            for arch in CONFIG['architectures']:
                print(f"\nTesting architecture: {arch}")
                model = EnhancedGNN(
                    in_channels=data.num_features,  # Access from original data
                    hidden_channels=CONFIG['hidden_dim'],
                    out_channels=dataset.num_classes,  # Access from original dataset
                    dropout=CONFIG['dropout'],
                    arch=arch
                ).to(device) # Ensure the model is on the correct device
                masked_data = augment_data(masked_data, CONFIG['edge_dropout_rate'])
                # Move masked_data to the same device as the model
                masked_data = masked_data.to(device)
                acc, fpr, fnr = train_and_evaluate(model, masked_data, CONFIG)

                dataset_results.append({
                    'setting': setting,
                    'mask_ratio': mask_ratio,
                    'architecture': arch,
                    'accuracy': round(acc, 4),
                    'false_positive_rate': round(fpr, 4),
                    'false_negative_rate': round(fnr, 4)
                })

    results[dataset_name] = dataset_results

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Confusion matrix shape: (10, 10)
Epoch 231 | Loss: 0.0574 | Accuracy: 0.8921 | FPR: 0.0138 | FNR: 0.1273
Confusion matrix shape: (10, 10)
Epoch 232 | Loss: 0.0517 | Accuracy: 0.8938 | FPR: 0.0135 | FNR: 0.1144
Confusion matrix shape: (10, 10)
Epoch 233 | Loss: 0.0511 | Accuracy: 0.8953 | FPR: 0.0132 | FNR: 0.1127
Confusion matrix shape: (10, 10)
Epoch 234 | Loss: 0.0574 | Accuracy: 0.8967 | FPR: 0.0131 | FNR: 0.1049
Confusion matrix shape: (10, 10)
Epoch 235 | Loss: 0.0519 | Accuracy: 0.8915 | FPR: 0.0137 | FNR: 0.1126
Confusion matrix shape: (10, 10)
Epoch 236 | Loss: 0.0517 | Accuracy: 0.8899 | FPR: 0.0140 | FNR: 0.1156
Confusion matrix shape: (10, 10)
Epoch 237 | Loss: 0.0500 | Accuracy: 0.8941 | FPR: 0.0134 | FNR: 0.1154
Confusion matrix shape: (10, 10)
Epoch 238 | Loss: 0.0530 | Accuracy: 0.8937 | FPR: 0.0135 | FNR: 0.1122
Confusion matrix shape: (10, 10)
Epoch 239 | Loss: 0.0510 | Accuracy: 0.8978 | FPR: 0.0129 | FN

In [19]:
# Summarize results in a readable format
for dataset_name, dataset_results in results.items():
    print(f"\nSummary for {dataset_name}:")
    print(f"{'Setting':<15}{'Mask Ratio':<15}{'Architecture':<15}{'Accuracy':<10}{'FPR':<10}{'FNR':<10}")
    print("-" * 65)
    for result in dataset_results:
        print(f"{result['setting']:<15}{result['mask_ratio']:<15}{result['architecture']:<15}{result['accuracy']:<10}{result['false_positive_rate']:<10}{result['false_negative_rate']:<10}")



Summary for Cora:
Setting        Mask Ratio     Architecture   Accuracy  FPR       FNR       
-----------------------------------------------------------------
transductive   0.0            GCN            0.8029    0.0348    0.2001    
transductive   0.0            GAT            0.8048    0.0342    0.2074    
transductive   0.0            SAGE           0.8232    0.0314    0.184     
transductive   0.0            GIN            0.7845    0.038     0.2321    
transductive   0.05           GCN            0.8158    0.0321    0.1839    
transductive   0.05           GAT            0.8177    0.0317    0.1932    
transductive   0.05           SAGE           0.8379    0.0284    0.1825    
transductive   0.05           GIN            0.7864    0.0374    0.2332    
transductive   0.1            GCN            0.8195    0.0319    0.18      
transductive   0.1            GAT            0.8379    0.0286    0.1741    
transductive   0.1            SAGE           0.8435    0.0277    0.1646    
tra