In [1]:
# Download the corresponding PyTorch Geometric module
%%capture
"""
Assign to TORCH with what you get from the cell above, E.g., export TORCH=1.13.1+cu113
"""
%env TORCH=2.1.0+cu118
!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install torch-geometric

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader
import torch.nn.functional as F



In [3]:
# # Load the Cora dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]


# Check if CUDA is available and use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move the data to the device (GPU if available)
data = data.to(device)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [None]:
import pdb
import torch
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool, global_max_pool
import random
from torch_geometric.utils import to_dense_adj

from sklearn.metrics import f1_score
import numpy as np

class SUM_GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SUM_GCN, self).__init__()
        self.layer1 = torch.nn.Linear(input_dim, hidden_dim)
        self.layer2 = torch.nn.Linear(hidden_dim, hidden_dim)
        self.lin = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        adj = to_dense_adj(edge_index, max_num_nodes=x.size(0))[0]
        # Move torch.eye to the same device as adj
        adj_i = adj + torch.eye(adj.size(0), device=adj.device)

        aggregated_neighbors0 =  torch.matmul(adj_i, x)
        x = self.layer1(aggregated_neighbors0)
        x = F.relu(x)

        aggregated_neighbors1 = torch.matmul(adj_i, x)
        x = self.layer2(aggregated_neighbors1)
        x = F.relu(x)

        x = self.lin(x)
        return x

    def get_ebd(self, x, edge_index):
        adj = to_dense_adj(edge_index, max_num_nodes=x.size(0))[0]
        # Move torch.eye to the same device as adj
        adj_i = adj + torch.eye(adj.size(0), device=adj.device)

        aggregated_neighbors0 =  torch.matmul(adj_i, x)
        x = self.layer1(aggregated_neighbors0)
        x = F.relu(x)

        aggregated_neighbors1 = torch.matmul(adj_i, x)
        x = self.layer2(aggregated_neighbors1)
        x = F.relu(x)
        return x


In [None]:
# Training the model
def train(model, data, optimizer, criterion, epochs=200):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        # Get the node features (data.x) and edge indices (data.edge_index)
        out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

# Evaluate the model
def evaluate(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)
        correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
        acc = correct / data.test_mask.sum()
        return acc.item()
    

def reset_weights(m):
    if isinstance(m, (torch.nn.Linear)):
        m.reset_parameters()

In [31]:
# Model hyperparameters
in_channels = dataset.num_node_features  # 1433 (Cora input feature size)
hidden_channels = 64
out_channels = dataset.num_classes  # 7 (Cora has 7 classes)
# Initialize the model
model = SUM_GCN(in_channels, hidden_channels, out_channels).to(device)
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


In [None]:
# Train the model
model.apply(reset_weights)
train(model, data, optimizer, criterion, epochs=50)

# Evaluate the model
acc = evaluate(model, data)
print(f'Test accuracy: {acc:.4f}')

Epoch 1/50, Loss: 1.9985
Epoch 11/50, Loss: 0.1533
Epoch 21/50, Loss: 0.0059
Epoch 31/50, Loss: 0.0007
Epoch 41/50, Loss: 0.0002
Test accuracy: 0.7620
Test dice: 0.7616


In [33]:
from torch_geometric.utils import to_dense_adj
# Node features (X)
x = data.x  # Shape: [num_nodes, num_features]
# Adjacency matrix (A)
A = to_dense_adj(data.edge_index)[0]  # Shape: [num_nodes, num_nodes]
K = (data.x) @ (data.x).T
m = 1000

In [None]:
from Perturbe_Algs import Attacks
from copy import deepcopy
attack_instance = Attacks(A,K,m, alpha = 50, filter = 'adj', max_iter=250,).to(device)

In [None]:
# The two algorithms roughtly take 10 minutes each to run on a single GPU(A100).
pgdavg_dataset = deepcopy(data)
pgdwst_dataset = deepcopy(data)

A_pgd_avg = attack_instance.Prob_PGD().clone().detach()
A_pgd_wst = attack_instance.Wst_PGD().clone().detach()

pgdavg_dataset.edge_index = torch.tensor(A_pgd_avg).nonzero(as_tuple=False).t().contiguous()
pgdwst_dataset.edge_index = torch.tensor(A_pgd_wst).nonzero(as_tuple=False).t().contiguous()

  S_iter = S_iter + (a/torch.linalg.norm(S_iter.grad, ord=2)) * S_iter.grad


Dataset and adjacency matrix saved.


  pgdavg_dataset.edge_index = torch.tensor(A_pgd_avg).nonzero(as_tuple=False).t().contiguous()
  pgdwst_dataset.edge_index = torch.tensor(A_pgd_wst).nonzero(as_tuple=False).t().contiguous()


In [None]:
# Save the dataset object
# Save the adjacency matrix (A)
torch.save(A_pgd_avg, 'avg_adj.pt')
torch.save(A_pgd_wst, 'wst_adj.pt')

# Save the modified datasets
torch.save(pgdavg_dataset, 'cora_pgdavg_data.pt')
torch.save(pgdwst_dataset, 'cora_pgdwst_data.pt')

print("Dataset and adjacency matrix saved.")

In [None]:
# Load the attacked graph adjacency matrix from folder'cora_adj'
# pgdavg_dataset = torch.load('cora_pgdavg_data.pt', weights_only=False)
# pgdwst_dataset = torch.load('cora_pgdwst_data.pt', weights_only=False)

In [41]:
num_rep = 10

list_ebd_random = []
list_ebd_pgdavg = []
list_ebd_pgdwst = []
list_acc = []
list_acc_random = []
list_acc_pgdavg = []
list_acc_pgdwst = []

for i in range(num_rep):

  A_random = attack_instance.randomAttack().clone().detach()
  random_dataset = deepcopy(data)
  random_dataset.edge_index = torch.tensor(A_random).nonzero(as_tuple=False).t().contiguous()

  model.apply(reset_weights)
  train(model, data, optimizer, criterion, epochs=50)
  test_acc = evaluate(model, data)
  acc_random = evaluate(model, random_dataset)
  acc_pgdavg = evaluate(model, pgdavg_dataset)
  acc_pgdwst = evaluate(model, pgdwst_dataset)

  list_acc.append(test_acc)
  list_acc_random.append(acc_random)
  list_acc_pgdavg.append(acc_pgdavg)
  list_acc_pgdwst.append(acc_pgdwst)


  ebd = model.get_ebd(data.x, data.edge_index)
  ebd_random = model.get_ebd(random_dataset.x, random_dataset.edge_index)
  ebd_pgdavg = model.get_ebd(pgdavg_dataset.x, pgdavg_dataset.edge_index)
  ebd_pgdwst = model.get_ebd(pgdwst_dataset.x, pgdwst_dataset.edge_index)


  p_ebd_random = torch.norm(ebd - ebd_random, p='fro')
  p_ebd_pgdavg = torch.norm(ebd - ebd_pgdavg, p='fro')
  p_ebd_pgdwst = torch.norm(ebd - ebd_pgdwst, p='fro')

  list_ebd_random.append(p_ebd_random.item())
  list_ebd_pgdavg.append(p_ebd_pgdavg.item())
  list_ebd_pgdwst.append(p_ebd_pgdwst.item())

  print(f"-----Iteration {i:d}-----")
  print(f"Test Acc: {test_acc:.4f}")
  print(f"PGD-AVG Test Acc: {acc_pgdavg:.4f}")
  print(f"PGD-WST Test Acc: {acc_pgdwst:.4f}")
  print(f"Random Test Acc: {acc_random:.4f}")



  random_dataset.edge_index = torch.tensor(A_random).nonzero(as_tuple=False).t().contiguous()


Epoch 1/50, Loss: 2.0845
Epoch 11/50, Loss: 0.1563
Epoch 21/50, Loss: 0.1032
Epoch 31/50, Loss: 0.0025
Epoch 41/50, Loss: 0.0023
-----Iteration 0-----
Test Acc: 0.7330
PGD-AVG Test Acc: 0.5850
PGD-WST Test Acc: 0.7260
Random Test Acc: 0.7050
Epoch 1/50, Loss: 2.0349
Epoch 11/50, Loss: 0.2107
Epoch 21/50, Loss: 0.2655
Epoch 31/50, Loss: 0.0999
Epoch 41/50, Loss: 0.0147
-----Iteration 1-----
Test Acc: 0.7430
PGD-AVG Test Acc: 0.5240
PGD-WST Test Acc: 0.7280
Random Test Acc: 0.7100
Epoch 1/50, Loss: 1.9921
Epoch 11/50, Loss: 0.1050
Epoch 21/50, Loss: 0.0054
Epoch 31/50, Loss: 0.0008
Epoch 41/50, Loss: 0.0004
-----Iteration 2-----
Test Acc: 0.7590
PGD-AVG Test Acc: 0.6030
PGD-WST Test Acc: 0.7520
Random Test Acc: 0.7230
Epoch 1/50, Loss: 1.9766
Epoch 11/50, Loss: 0.1384
Epoch 21/50, Loss: 0.0083
Epoch 31/50, Loss: 0.0016
Epoch 41/50, Loss: 0.0008
-----Iteration 3-----
Test Acc: 0.7510
PGD-AVG Test Acc: 0.5830
PGD-WST Test Acc: 0.7310
Random Test Acc: 0.7120
Epoch 1/50, Loss: 1.9418
Epoch 1

In [None]:

print(f"Test: {np.mean(list_acc)*100:.2f}, std:{np.std(list_acc)*100:.2f}")
print(f"Mean of random: {np.mean(list_acc_random)*100:.2f}, std:{np.std(list_acc_random)*100:.2f}")
print(f"Mean of Wst_PGD: {np.mean(list_acc_pgdwst)*100:.2f}, std: {np.std(list_acc_pgdwst)*100:.2f}")
print(f"Mean of Prob_PGD:{np.mean(list_acc_pgdavg)*100:.2f}, std: {np.std(list_acc_pgdavg):.2f}")


Test: 74.24, std:1.96
Mean of random: 70.67, std:2.43
Mean of Wst_PGD: 73.15, std: 2.03
Mean of Prob_PGD:57.02, std: 0.04


In [43]:
print(f"Mean of random embedding norm: {np.mean(list_ebd_random):.2f}, std:{np.std(list_ebd_random):.2f}")
print(f"Mean of Wst_PGD embedding norm: {np.mean(list_ebd_pgdwst):.2f}, std: {np.std(list_ebd_pgdwst):.2f}")
print(f"Mean of Prob_PGD embedding norm:{np.mean(list_ebd_pgdavg):.2f}, std: {np.std(list_ebd_pgdavg):.2f}")

Mean of random embedding norm: 1361.20, std:554.58
Mean of Wst_PGD embedding norm: 26285.39, std: 13684.87
Mean of Prob_PGD embedding norm:69498.89, std: 40595.28


In [44]:
print(f"Test: {np.mean(list_acc)*100:.2f} \pm {np.std(list_acc)*100:.2f}")
print(f"Mean of random: {np.mean(list_acc_random)*100:.2f} \pm {np.std(list_acc_random)*100:.2f}")
print(f"Mean of Wst_PGD: {np.mean(list_acc_pgdwst)*100:.2f} \pm {np.std(list_acc_pgdwst)*100:.2f}")
print(f"Mean of Prob_PGD:{np.mean(list_acc_pgdavg)*100:.2f} \pm {np.std(list_acc_pgdavg):.2f}")
print(f"Mean of random embedding norm: {np.mean(list_ebd_random):.2f} \pm {np.std(list_ebd_random):.2f}")
print(f"Mean of Wst_PGD embedding norm: {np.mean(list_ebd_pgdwst):.2f} \pm {np.std(list_ebd_pgdwst):.2f}")
print(f"Mean of Prob_PGD embedding norm:{np.mean(list_ebd_pgdavg):.2f} \pm {np.std(list_ebd_pgdavg):.2f}")

Test: 74.24 \pm 1.96
Mean of random: 70.67 \pm 2.43
Mean of Wst_PGD: 73.15 \pm 2.03
Mean of Prob_PGD:57.02 \pm 0.04
Mean of random embedding norm: 1361.20 \pm 554.58
Mean of Wst_PGD embedding norm: 26285.39 \pm 13684.87
Mean of Prob_PGD embedding norm:69498.89 \pm 40595.28
