In [1]:
# 全局攻击 数据集：异配图数据集；代理模型：GCN；攻击算法：TopoAttack：Min-Max、PGD
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch_geometric.transforms as T
import torch.optim as optim
import scipy.sparse as sp
from copy import deepcopy
from torch_geometric.utils import to_undirected
from torch_sparse import SparseTensor
from tqdm import tqdm
import time

from utils import *
from gcn import GCN
from H2GCN import H2GCN
from topology_attack import MinMax

Using backend: pytorch


In [2]:
# 模拟命令行参数
class Args:
    def __init__(self, dataset='cora', model='min-max'):
        self.seed = 15
        self.dataset = dataset
        self.input_size = 0
        self.output_size = 0
        self.hidden_size = 64
        self.epochs = 2000
        self.lr = 0.001
        self.drop_prob = 0.5
        self.round = 2
        self.train_ratio = 0.6
        self.ptb_rate = 0.05
        self.patience = 200
        self.model = model

# 传入特定的参数值
args = Args(dataset='cora', model='min-max') 
# 默认dataset为'cora'，可以传入的dataset参数有：choices=['cora', 'citeseer', 'pubmed', 'film', 'squirrel', 'chameleon', 'texas', 'cornell', 'wisconsin']
# model默认'min-max'，可选['PGD', 'min-max']

In [3]:
# 是否使用cuda
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

cuda: True


In [4]:
# 加载数据集
g, nclass, features, labels, train, val, test = preprocess_data(args.dataset, args.train_ratio)

idx_train = train.numpy()
idx_test = test.numpy()
idx_val = val.numpy()

train = train.to(device)
test = test.to(device)
val = val.to(device)

cora 7


In [5]:
# 得到adj和adj_2hop
def toCSR(spt):
    rowptr, col, value = spt.csr()
    mat = sp.csr_matrix((value, col, rowptr)).tolil()
    mat.setdiag(0)
    return mat.tocsr()

edge_index = to_undirected(g.edges())
adj = SparseTensor(row=edge_index[0], col=edge_index[1],
                   sparse_sizes=(g.num_nodes(), g.num_nodes())).fill_value(1.0)
adj2 = adj.matmul(adj).fill_value(1.0)
adj_2hop = (toCSR(adj2) - toCSR(adj)) > 0
adj_2hop = SparseTensor.from_scipy(adj_2hop).fill_value(1.0)

adj = adj.to(device)
adj_clean0 = adj
adj_2hop = adj_2hop.to(device)

In [6]:
# 得到目标节点的一跳和两跳邻居节点
# target_nodes = np.random.randint(15, labels.numel(), size=20)
dense_adj = adj.to_dense().cpu().numpy()
dense_adj_2hop = adj_2hop.to_dense().cpu().numpy()

In [7]:
def train_model(model, adj, adj_2hop, optimizer, loss_fn):
    model.train()
    logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    train_loss = loss_fn(logits[train], labels[train])
    train_acc = (pred[train] == labels[train]).float().mean()
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    return train_loss, train_acc

def evaluate_val(model, adj, adj_2hop):
    model.eval()
    with torch.no_grad():
        logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    val_acc = (pred[val] == labels[val]).float().mean()
    return val_acc

def evaluate_test(model, adj, adj_2hop):
    model.eval()
    with torch.no_grad():
        logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    test_acc = (pred[test] == labels[test]).float().mean()
    return test_acc

In [8]:
features_cpu = features
labels_cpu = labels
features = features.to(device)
labels = labels.to(device)

In [9]:
# H2GCN模型初始化
# 使用torch.unique()函数找到张量中的唯一值
unique_classes = torch.unique(labels)
# 统计唯一值的数量，即类别数
num_classes = len(unique_classes)
# 模型参数设置
args.input_size = len(features[0])
args.output_size = num_classes

model = H2GCN(in_channels=args.input_size,
                hidden_channels=args.hidden_size,
                out_channels=args.output_size,
                drop_prob=args.drop_prob,
                round=args.round)

model = model.to(device)

In [10]:
optimizer = optim.Adam(params=model.parameters(), lr=args.lr)
loss_fn = nn.NLLLoss()

best_val_acc, best_model = 0., None
for i in range(args.epochs):
    train_loss, train_acc = train_model(model, adj, adj_2hop, optimizer, loss_fn)
    val_acc = evaluate_val(model, adj, adj_2hop)
    if (i + 1) % 200 == 0:
        print("ep{}: train loss: {:.4f} train acc: {:.4f} val acc: {:.4f}".format(i + 1, train_loss, train_acc, val_acc))

    if best_val_acc < val_acc:
        best_val_acc = val_acc
        best_model = deepcopy(model)

test_acc = evaluate_test(best_model, adj, adj_2hop)
print("test acc: {:.4f}".format(test_acc))

ep200: train loss: 0.5659 train acc: 0.8714 val acc: 0.7060
ep400: train loss: 0.2643 train acc: 0.9500 val acc: 0.7060
ep600: train loss: 0.1075 train acc: 1.0000 val acc: 0.7100
ep800: train loss: 0.0877 train acc: 0.9929 val acc: 0.7100
ep1000: train loss: 0.0362 train acc: 1.0000 val acc: 0.7140
ep1200: train loss: 0.0220 train acc: 1.0000 val acc: 0.7260
ep1400: train loss: 0.0296 train acc: 0.9929 val acc: 0.7160
ep1600: train loss: 0.0192 train acc: 0.9929 val acc: 0.7200
ep1800: train loss: 0.0777 train acc: 0.9929 val acc: 0.7260
ep2000: train loss: 0.0068 train acc: 1.0000 val acc: 0.7160
test acc: 0.7420


In [11]:
def trans1(features):
    # 转features数据格式
    # 获取非零元素的索引和对应的值
    indices = torch.nonzero(features).t()
    values = features[indices[0], indices[1]]
    # 构建CSR格式的三个数组
    row_ptr = torch.tensor([0] + torch.cumsum(torch.bincount(indices[0]), dim=0).tolist(), dtype=torch.int64)
    col_indices = indices[1]
    data = values
    # 创建CSR格式的稀疏矩阵
    features = sp.csr_matrix((data.cpu().numpy(), col_indices.cpu().numpy(), row_ptr.cpu().numpy()), shape=features.shape)
   
    return features

def trans2(features):
    # 将 CSR 格式的稀疏矩阵转换成 PyTorch 稀疏张量
    coo_matrix = features.tocoo()
    indices = torch.tensor([coo_matrix.row, coo_matrix.col], dtype=torch.long)
    values = torch.tensor(coo_matrix.data, dtype=torch.float32)
    size = coo_matrix.shape

    # 创建 PyTorch 稀疏张量
    sparse_tensor = torch.sparse_coo_tensor(indices, values, size=size)

    return sparse_tensor

In [12]:
# 转换为adj_clean（CSR matrix），原图adj为Tensor
adj_clean = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
adj_clean = adj_clean.astype(np.float32)
adj_sparse = trans2(adj_clean)
adj = adj_sparse.to_dense()
adj_cpu = adj

In [13]:
# 设置代理模型GCN
victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1, nhid=16,
        dropout=0.5, weight_decay=5e-4, device=device)

victim_model = victim_model.to(device)
victim_model.fit(features, adj, labels, idx_train)

In [14]:
# 设置攻击模型
model = MinMax(model=victim_model, nnodes=adj.shape[0], loss_type='CE', device=device)

model = model.to(device)

In [15]:
start_time = time.time()  # 获取开始时间

In [16]:
# 扰动的边数
perturbations = int(g.num_edges() * args.ptb_rate)

# 进行攻击
model.attack(features_cpu, adj, labels_cpu, idx_train, perturbations)
modified_adj = model.modified_adj
modified_adj = modified_adj.to('cpu')

  0%|          | 0/200 [00:00<?, ?it/s]

100%|██████████| 200/200 [00:29<00:00,  6.84it/s]


In [17]:
# 将PyTorch张量转换为CSR格式
sparse_matrix = sp.csr_matrix(modified_adj.numpy())
modified_g = dgl.from_scipy(sparse_matrix)
modified_edge_index = to_undirected(modified_g.edges())
adj_attacked = SparseTensor(row=modified_edge_index[0], col=modified_edge_index[1],
                   sparse_sizes=(modified_g.num_nodes(), modified_g.num_nodes())).fill_value(1.0)
adj2_attacked = adj_attacked.matmul(adj_attacked).fill_value(1.0)
adj_attacked_2hop = (toCSR(adj2_attacked) - toCSR(adj_attacked)) > 0
adj_attacked_2hop = SparseTensor.from_scipy(adj_attacked_2hop).fill_value(1.0)

adj_attacked = adj_attacked.to(device)
adj_attacked_2hop = adj_attacked_2hop.to(device)

In [18]:
# 逃逸攻击
test_acc = evaluate_test(best_model, adj_attacked, adj_attacked)
print("test acc: {:.4f}".format(test_acc))

test acc: 0.6940


In [19]:
end_time = time.time()  # 获取结束时间
elapsed_time = end_time - start_time  # 计算经过的时间
print(f"Elapsed time: {elapsed_time} seconds")

Elapsed time: 31.74736738204956 seconds
