In [1]:
# 全局攻击 逃逸攻击 数据集：异配图数据集；代理模型：GCN；攻击算法：AtkSE
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch_geometric.transforms as T
import torch.optim as optim
import scipy.sparse as sp
from copy import deepcopy
from torch_geometric.utils import to_undirected
from torch_sparse import SparseTensor
from tqdm import tqdm
from torch_geometric.data import Data

from utils import *
from gcn import GCN
from AtkSE import AtkSE
from gcn_AtkSE import GCN_AtkSE

Using backend: pytorch


In [2]:
# 模拟命令行参数
class Args:
    def __init__(self, dataset='cora', weight_decay=5e-4, model = 'Meta-Self'):
        self.seed = 15
        self.dataset = dataset
        self.input_size = 0
        self.output_size = 0
        self.hidden_size = 64
        self.hidden = 16
        self.epochs = 2000
        self.lr = 0.001
        self.drop_prob = 0.5
        self.round = 2
        self.train_ratio = 0.6
        self.patience = 200
        self.weight_decay = weight_decay
        self.ptb_rate = 0.1
        self.model = model
        self.momentum = 0.9
        self.dropnode = 0.05
        self.gauss_noise = 2e-4
        self.smooth_loop = 40
        self.wait_list = 4
        self.intervals = 0.25
        self.candidates = 32
        
# 传入特定的参数值
args = Args(dataset='cora', weight_decay=1e-4, model = 'Meta-Self') 
# 默认dataset为'cora'，可以传入的dataset参数有：choices=['cora', 'citeseer', 'pubmed', 'film', 'squirrel', 'chameleon', 'texas', 'cornell', 'wisconsin']
# model默认'Meta-Self'，可选['Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train']

In [3]:
# 是否使用cuda
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

cuda: True


In [4]:
# 加载数据集
g, nclass, features, labels, train, val, test = preprocess_data(args.dataset, args.train_ratio)

features = features.to(device)

idx_train = train.numpy()
idx_test = test.numpy()
idx_val = val.numpy()

train = train.to(device)
test = test.to(device)
val = val.to(device)

cora 7


In [5]:
def trans1(features):
    # 转features数据格式
    # 获取非零元素的索引和对应的值
    indices = torch.nonzero(features).t()
    values = features[indices[0], indices[1]]
    # 构建CSR格式的三个数组
    row_ptr = torch.tensor([0] + torch.cumsum(torch.bincount(indices[0]), dim=0).tolist(), dtype=torch.int64)
    col_indices = indices[1]
    data = values
    # 创建CSR格式的稀疏矩阵
    features = sp.csr_matrix((data.cpu().numpy(), col_indices.cpu().numpy(), row_ptr.cpu().numpy()), shape=features.shape)
   
    return features

def trans2(features):
    # 将 CSR 格式的稀疏矩阵转换成 PyTorch 稀疏张量
    coo_matrix = features.tocoo()
    indices = torch.tensor([coo_matrix.row, coo_matrix.col], dtype=torch.long)
    values = torch.tensor(coo_matrix.data, dtype=torch.float32)
    size = coo_matrix.shape

    # 创建 PyTorch 稀疏张量
    sparse_tensor = torch.sparse_coo_tensor(indices, values, size=size)

    return sparse_tensor

In [6]:
# 转换为adj_clean（CSR matrix），原图adj为Tensor
adj_clean = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
adj_clean = adj_clean.astype(np.float32)
adj_sparse = trans2(adj_clean)
adj = adj_sparse.to_dense()
adj = adj.to(device)

In [7]:
node_classes=labels
# 使用torch.unique函数获取唯一值
unique_classes = torch.unique(node_classes)
# 获取唯一值的数量
c = len(unique_classes)
num_loops = 2
print("一共有 {} 个类别。".format(c))

一共有 7 个类别。


In [8]:
def test(adj):
    ''' test on GCN '''
    global gcn
    # adj = normalize_adj_tensor(adj)
    gcn = GCN(nfeat=features.shape[1],
              nhid=16,
              nclass=labels.max().item() + 1,
              dropout=0.5, device=device)

    gcn = gcn.to(device)

    gcn.fit(features, adj, labels, idx_train) # train without model picking
    # gcn.fit(features, adj, labels, idx_train, idx_val) # train with validation model picking
    output = gcn.output
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

    return acc_test.item()

In [9]:
labels_cpu = labels
labels = labels.to(device)

In [10]:
print('=== testing GCN on clean graph ===')
test(adj_sparse)

=== testing GCN on clean graph ===
Test set results: loss= 0.7718 accuracy= 0.8190


0.8190000000000001

In [11]:
model = AtkSE(args, nfeat=features.shape[1], hidden_sizes=[args.hidden],
                    nnodes=adj.shape[0], nclass=nclass, dropout=0.5,
                    train_iters=100, attack_features=False, lambda_=0, device=device, momentum=args.momentum)

In [12]:
idx_unlabeled = np.union1d(idx_val, idx_test)
perturbations = int(args.ptb_rate * (adj.sum()//2))

In [13]:
modified_adj = model(features, adj, labels, idx_train, idx_unlabeled, perturbations)
modified_adj = modified_adj.to(device)

Perturbing graph: 100%|██████████| 527/527 [7:17:45<00:00, 49.84s/it]  


In [14]:
# 逃逸攻击
print('=== testing GCN on perturbed graph ===')
gcn.eval()
with torch.no_grad():
    logits = gcn(features, modified_adj)
pred = logits.argmax(dim=-1)
test_acc = (pred[idx_test] == labels[idx_test]).float().mean()
print(test_acc)

=== testing GCN on perturbed graph ===
tensor(0.7160, device='cuda:0')


In [15]:
import pickle

file_name = f'{args.dataset}_AtkSE_modified_adj.pkl'

# 存储变量到文件
with open(file_name, 'wb') as f:
    pickle.dump(modified_adj, f)

In [16]:
a

NameError: name 'a' is not defined

In [None]:
import pickle

file_name = f'{args.dataset}_AtkSE_modified_adj.pkl'

# 从文件读取变量
with open(file_name, 'rb') as f:
    modified_adj = pickle.load(f)