In [1]:
# 全局攻击 数据集：异配图数据集；代理模型：GCN；攻击算法：最新的代码
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch_geometric.transforms as T
import torch.optim as optim
import scipy.sparse as sp
from copy import deepcopy
from torch_geometric.utils import to_undirected
from torch_sparse import SparseTensor
import matplotlib.pyplot as plt

from utils import *
from gcn import GCN

Using backend: pytorch


In [2]:
# 模拟命令行参数
class Args:
    def __init__(self, dataset='cora'):
        self.seed = 15
        self.dataset = dataset
        self.input_size = 0
        self.output_size = 0
        self.hidden_size = 64
        self.epochs = 2000
        self.lr = 0.001
        self.drop_prob = 0.5
        self.round = 2
        self.train_ratio = 0.6
        self.patience = 200

# 传入特定的参数值
args = Args(dataset='texas') 
# 默认dataset为'cora'，可以传入的dataset参数有：choices=['cora', 'citeseer', 'pubmed', 'film', 'squirrel', 'chameleon', 'texas', 'cornell', 'wisconsin']

In [3]:
# 是否使用cuda
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

cuda: True


In [4]:
# 加载数据集
g, nclass, features, labels, train, val, test = preprocess_data(args.dataset, args.train_ratio)

features = features.to(device)
labels = labels.to(device)

idx_train = train.numpy()
idx_test = test.numpy()
idx_val = val.numpy()

train = train.to(device)
test = test.to(device)
val = val.to(device)

texas 5




In [5]:
node_classes=labels
# 使用torch.unique函数获取唯一值
unique_classes = torch.unique(node_classes)
# 获取唯一值的数量
c = len(unique_classes)
num_loops = 2
perturbed_rate = 0.1 # 扰动率
print("一共有 {} 个类别。".format(c))

一共有 5 个类别。


In [6]:
# 得到adj和adj_2hop
def toCSR(spt):
    rowptr, col, value = spt.csr()
    mat = sp.csr_matrix((value, col, rowptr)).tolil()
    mat.setdiag(0)
    return mat.tocsr()

edge_index = to_undirected(g.edges())
adj = SparseTensor(row=edge_index[0], col=edge_index[1],
                   sparse_sizes=(g.num_nodes(), g.num_nodes())).fill_value(1.0)
adj2 = adj.matmul(adj).fill_value(1.0)
adj_2hop = (toCSR(adj2) - toCSR(adj)) > 0
adj_2hop = SparseTensor.from_scipy(adj_2hop).fill_value(1.0)

adj = adj.to(device)
adj_clean = adj
adj_2hop = adj_2hop.to(device)

In [7]:
# 得到目标节点的一跳和两跳邻居节点
# target_nodes = np.random.randint(15, labels.numel(), size=20)
dense_adj = adj.to_dense().cpu().numpy()
dense_adj_2hop = adj_2hop.to_dense().cpu().numpy()

In [8]:
# 预算，修改的边的数量
budget = int(g.num_edges() * perturbed_rate)
#budget = 10

# 全部节点的度
d = torch.sum(adj[test].to_dense(), dim = 1)
d = d.int()

# 全部节点的二阶邻域的度
d2 = torch.sum(adj_2hop[test].to_dense(), dim = 1)
d2 = d2.int()

# 转换为 CSR matrix，得到被攻击的初始图（=原图）
adj_attacked = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
adj_attacked = adj_attacked.astype(np.float32)

# 假设你想要添加的边的权重是1
edge_weight = 1

In [9]:
def trans1(features):
    # 转features数据格式
    # 获取非零元素的索引和对应的值
    indices = torch.nonzero(features).t()
    values = features[indices[0], indices[1]]
    # 构建CSR格式的三个数组
    row_ptr = torch.tensor([0] + torch.cumsum(torch.bincount(indices[0]), dim=0).tolist(), dtype=torch.int64)
    col_indices = indices[1]
    data = values
    # 创建CSR格式的稀疏矩阵
    features = sp.csr_matrix((data.cpu().numpy(), col_indices.cpu().numpy(), row_ptr.cpu().numpy()), shape=features.shape)
   
    return features

def trans2(features):
    # 将 CSR 格式的稀疏矩阵转换成 PyTorch 稀疏张量
    coo_matrix = features.tocoo()
    indices = torch.tensor([coo_matrix.row, coo_matrix.col], dtype=torch.long)
    values = torch.tensor(coo_matrix.data, dtype=torch.float32)
    size = coo_matrix.shape

    # 创建 PyTorch 稀疏张量
    sparse_tensor = torch.sparse_coo_tensor(indices, values, size=size)

    return sparse_tensor

In [10]:
def test_model(adj_ori):
    ''' test on GCN '''
    global gcn
    # adj = normalize_adj_tensor(adj)
    gcn = GCN(nfeat=features.shape[1],
              nhid=16,
              nclass=labels.max().item() + 1,
              dropout=0.5, device=device)

    gcn = gcn.to(device)

    gcn.fit(features, adj_ori, labels, idx_train) # train without model picking
    # gcn.fit(features, adj, labels, idx_train, idx_val) # train with validation model picking
    output = gcn.output
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

    return acc_test.item()

In [11]:
# 转换为 CSR matrix，原图
adj_clean = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
adj_clean = adj_clean.astype(np.float32)

features_ori =features
adj_ori = adj_clean
features = trans1(features)
features = trans2(features)

In [12]:
adj_clean = trans2(adj_clean)
print('=== testing GCN on clean graph ===')
test_model(adj_clean)

=== testing GCN on clean graph ===


Test set results: loss= 1.0298 accuracy= 0.5405


0.5405405405405406

攻击后

In [13]:
def attack(dense_adj, dense_adj_2hop, d_dim): # d_dim表示几阶邻域度
    global budget
    # 筛选：得到modi_node_idx、modi_node_label、edges_to_connect，分别表示修改的节点ID、修改的节点的标签、修改的节点需要添加的连接数
    modi_node_idx1 = []
    modi_node_label1 = []
    edges_to_connect1 = []
    modi_node_idx2 = []
    modi_node_label2 = []
    edges_to_connect2 = []
    sam1hop_sum = 0
    sam2hop_sum = 0
    src = []
    des = []
    for i in range(len(test)):
        target_node = test[i]
        target_label = labels[target_node]
        indices_adj = np.where(dense_adj[target_node] == 1)
        indices_adj_2hop = np.where(dense_adj_2hop[target_node] == 1)
        neighbor_labels = labels[indices_adj]
        neighbor_2hop__labels = labels[indices_adj_2hop]
        count1 = torch.sum(torch.eq(neighbor_labels, target_label))
        total_elements1 = neighbor_labels.numel()
        count2 = torch.sum(torch.eq(neighbor_2hop__labels, target_label))
        total_elements2 = neighbor_2hop__labels.numel()
        sam1hop = count1/total_elements1
        sam2hop = count2/total_elements2
        sam1hop_sum += abs(sam1hop - 1/c)
        sam2hop_sum += abs(sam2hop - 1/c)
        if d_dim==1: # 如果是一阶邻域的度
            if count1!=0 and d[i]>2: # 度大于2并且节点周围有与该节点标签一样的一阶邻居
                modi_node_idx1.append(target_node.item())
                modi_node_label1.append(labels[target_node].item())
                edges_to_connect1.append(int(c * count1 -d[i].item()))
            if count1==0 and d[i]>2: # 度大于2并且节点周围没有与该节点标签一样的一阶邻居
                modi_node_idx2.append(target_node.item())
                modi_node_label2.append(labels[target_node].item())
                edges_to_connect2.append(d[i].item())
        elif d_dim==2: # 如果是二阶邻域的度
            if count2!=0 and d2[i]>2: # 度大于2并且节点周围有与该节点标签一样的二阶邻居
                #print("邻居：", indices_adj[0][0])
                neighbor_id = indices_adj[0][0]
                #modi_node_idx1.append(target_node.item())
                #modi_node_label1.append(labels[target_node].item())
                modi_node_idx1.append(neighbor_id)
                modi_node_label1.append(labels[neighbor_id].item())
                edges_to_connect1.append(int(c * count2 -d2[i].item()))
            if count2==0 and d2[i]>2: # 度大于2并且节点周围没有与该节点标签一样的二阶邻居
                #print("邻居：", indices_adj[0][0])
                neighbor_id = indices_adj[0][0]
                #modi_node_idx2.append(target_node.item())
                #modi_node_label2.append(labels[target_node].item())
                modi_node_idx2.append(neighbor_id)
                modi_node_label2.append(labels[neighbor_id].item())
                edges_to_connect2.append(d2[i].item())

    # 排序：先攻击connect_num大的节点（实验结果）
    connect_num = [[] for _ in range(c)]
    sorted_num = [[] for _ in range(c)]
    idxs = [[] for _ in range(c)]
    for i in range(c):
        for j in range(len(modi_node_idx1)):
            if(modi_node_label1[j] == i):
                connect_num[i].append(edges_to_connect1[j])
                idxs[i].append(modi_node_idx1[j])
                sorted_num[i].append(edges_to_connect1[j]) # 先攻击connect_num大的节点
        # 建立索引列表并根据 a[j] 的值排序（从大到小）
        sorted_indices = sorted(range(len(sorted_num[i])), key=lambda x: sorted_num[i][x], reverse=True)
        # 根据排序后的索引重新排列 b[j]
        sorted_idxs = [idxs[i][k] for k in sorted_indices]
        # 根据排序后的索引重新排列 a[j]
        sorted_edges_to_connect = [connect_num[i][k] for k in sorted_indices]
        # 更新 a[j] 和 b[j]
        connect_num[i] = sorted_edges_to_connect
        idxs[i] = sorted_idxs

    num = 0
    # 攻击不完全异配的节点
    for i in range(c):
        for k in range(len(connect_num[i])):
            for j in range(c):
                for q in range(len(connect_num[j])):
                    if(budget>0 and i!=j and connect_num[i][k]>0 and connect_num[j][q]>0):
                        a = idxs[i][k]
                        b = idxs[j][q]
                        if adj_attacked[a,b] == 0: # 如果节点i和节点j不同类且未连接在一起
                            adj_attacked[a,b] = edge_weight # 连接节点i和节点j
                            src.append(a)
                            des.append(b)
                            connect_num[i][k] -= 1 # 节点i要连接的边数减去1
                            connect_num[j][q] -= 1 # 节点j要连接的边数减去1
                            budget -= 1 # 预算减去1
                            num = num + 1 #添加的边数
    # 攻击完全异配的节点
    for i in range(len(modi_node_idx2)):  # 42个目标节点，下标0-41
        for j in range(len(modi_node_idx2)):
            if edges_to_connect2[i] > 0 and budget>0: # 如果节点i要连接的边数大于0
                if modi_node_label2[i] == modi_node_label2[j] and adj_attacked[i,j] == 0: # 如果节点i和节点j同类且未连接在一起
                    adj_attacked[i,j] = edge_weight # 连接节点i和节点j
                    src.append(i)
                    des.append(j)
                    edges_to_connect2[i] -= 1 # 节点i要连接的边数减去1
                    edges_to_connect2[j] -= 1 # 节点j要连接的边数减去1
                    budget -= 1 # 预算减去1
                    num = num + 1 #添加的边数
    print(src)
    print(des)
    return num

In [14]:
num1 = 0
num2 = 0
num3 = 0
num4 = 0 
for i in range(num_loops):
    # 攻击，可以交换顺序
    
    num1 = attack(dense_adj, dense_adj_2hop, d_dim=1)
    num2 = attack(dense_adj, dense_adj_2hop, d_dim=2)
    
    # 将邻接矩阵转换为 COO 格式
    coo_adjacency_matrix = sp.coo_matrix(adj_attacked)
    # 创建一个 DGL 图对象
    g_attacked = dgl.graph((coo_adjacency_matrix.row, coo_adjacency_matrix.col))
    edge_index = to_undirected(g_attacked.edges())
    adj = SparseTensor(row=edge_index[0], col=edge_index[1],
                        sparse_sizes=(g_attacked.num_nodes(), g_attacked.num_nodes())).fill_value(1.0)
    adj2 = adj.matmul(adj).fill_value(1.0)
    adj_2hop = (toCSR(adj2) - toCSR(adj)) > 0
    adj_2hop = SparseTensor.from_scipy(adj_2hop).fill_value(1.0)

    adj = adj.to(device)
    mod_adj = adj
    adj_2hop = adj_2hop.to(device)
    
    # 得到目标节点的一跳和两跳邻居节点
    dense_adj = adj.to_dense().cpu().numpy()
    dense_adj2 = dense_adj
    dense_adj_2hop = adj_2hop.to_dense().cpu().numpy()

  self._set_intXint(row, col, x.flat[0])


[147, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8]
[146, 0, 2, 1, 6, 0, 2, 3, 5, 0, 2, 4, 3, 5, 1, 6, 7, 8, 1, 6, 1, 6]
[56, 56, 56, 56, 56, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 66, 66, 66, 66, 66, 66, 66, 66, 29, 29, 29, 29, 29, 29, 29, 29]
[58, 173, 15, 4, 95, 29, 47, 58, 173, 84, 116, 146, 15, 4, 80, 131, 95, 29, 47, 58, 173, 84, 116, 146, 15, 50, 66, 116, 146, 15, 4, 80, 131]
[]
[]
[]
[]


In [15]:
# 全部节点的度
d_clean = torch.sum(adj_clean.to_dense(), dim = 1)
d_clean = d_clean.int()
# 全部节点的度
d_mod = torch.sum(mod_adj.to_dense(), dim = 1)
d_mod = d_mod.int()

In [16]:
tensor_adj = torch.tensor(dense_adj)
tensor_adj = trans1(tensor_adj)
tensor_adj = trans2(tensor_adj)
modified_adj = tensor_adj.to(device)

In [17]:
# 逃逸攻击
print('=== testing GCN on perturbed graph ===')
gcn.eval()
with torch.no_grad():
    logits = gcn(features_ori, modified_adj)
pred = logits.argmax(dim=-1)
test_acc = (pred[idx_test] == labels[idx_test]).float().mean()
print(test_acc)

=== testing GCN on perturbed graph ===
tensor(0.5946, device='cuda:0')
