In [1]:
# 全局攻击 数据集：异配图数据集；代理模型：H2GCN；网格搜索，找到最佳lr和epochs
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch_geometric.transforms as T
import torch.optim as optim
import scipy.sparse as sp
from copy import deepcopy
from torch_geometric.utils import to_undirected
from torch_sparse import SparseTensor
from tqdm import tqdm

from utils import *
from H2GCN import H2GCN
from gcn import GCN
from DICE import DICE

Using backend: pytorch


In [2]:
# 模拟命令行参数
class Args:
    def __init__(self, dataset='cora', lr=0.001, epochs=2000):
        self.seed = 15
        self.dataset = dataset 
        self.input_size = 0
        self.output_size = 0
        self.hidden_size = 64
        self.lr = lr
        self.epochs = epochs
        self.drop_prob = 0.5
        self.round = 2
        self.train_ratio = 0.6
        self.patience = 200


In [3]:
import itertools

# 定义要尝试的学习率和 epochs 的候选值
lr_values = [0.0001, 0.001, 0.01]
epochs_values = [2000]

# 用于保存最佳结果的变量
best_lr = None
best_epochs = None
best_accuracy = 0.0  # 假设是准确率作为评价指标

In [4]:
def toCSR(spt):
    rowptr, col, value = spt.csr()
    mat = sp.csr_matrix((value, col, rowptr)).tolil()
    mat.setdiag(0)
    return mat.tocsr()

def get_adj_2hop():
    edge_index = to_undirected(g.edges())
    adj = SparseTensor(row=edge_index[0], col=edge_index[1],
                        sparse_sizes=(g.num_nodes(), g.num_nodes())).fill_value(1.0)
    adj2 = adj.matmul(adj).fill_value(1.0)
    adj_2hop = (toCSR(adj2) - toCSR(adj)) > 0
    adj_2hop = SparseTensor.from_scipy(adj_2hop).fill_value(1.0)

    adj = adj.to(device)
    adj_2hop = adj_2hop.to(device)

    return adj, adj_2hop

In [5]:
def init_model(args):
    # H2GCN模型初始化
    # 使用torch.unique()函数找到张量中的唯一值
    unique_classes = torch.unique(labels)
    # 统计唯一值的数量，即类别数
    num_classes = len(unique_classes)
    # 模型参数设置
    args.input_size = len(features[0])
    args.output_size = num_classes

    model = H2GCN(in_channels=args.input_size,
                hidden_channels=args.hidden_size,
                out_channels=args.output_size,
                drop_prob=args.drop_prob,
                round=args.round)

    model = model.to(device)
    return model

In [6]:
def train_model(model, adj, adj_2hop, optimizer, loss_fn):
    model.train()
    logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    train_loss = loss_fn(logits[train], labels[train])
    train_acc = (pred[train] == labels[train]).float().mean()
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    return train_loss, train_acc

def evaluate_val(model, adj, adj_2hop):
    model.eval()
    with torch.no_grad():
        logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    val_acc = (pred[val] == labels[val]).float().mean()
    return val_acc

def evaluate_test(model, adj, adj_2hop):
    model.eval()
    with torch.no_grad():
        logits = model(features, adj, adj_2hop)
    pred = logits.argmax(dim=-1)
    test_acc = (pred[test] == labels[test]).float().mean()
    return test_acc

In [7]:
# 网格搜索：尝试每一种 lr 和 epochs 的组合
for lr, epochs in itertools.product(lr_values, epochs_values):
    # 创建参数对象
    args = Args(dataset='citeseer', lr=lr, epochs=epochs)
    # 默认dataset为'cora'，可以传入的dataset参数有：choices=['cora', 'citeseer', 'pubmed', 'film', 'squirrel', 'chameleon', 'texas', 'cornell', 'wisconsin']
    
    # 是否使用cuda
    args.cuda = torch.cuda.is_available()
    print('cuda: %s' % args.cuda)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # 加载数据集
    g, nclass, features, labels, train, val, test = preprocess_data(args.dataset, args.train_ratio)

    features = features.to(device)
    labels = labels.to(device)

    idx_train = train.numpy()
    idx_test = test.numpy()
    idx_val = val.numpy()

    train = train.to(device)
    test = test.to(device)
    val = val.to(device)


    adj, adj_2hop = get_adj_2hop()
    model = init_model(args)
    optimizer = optim.Adam(params=model.parameters(), lr=args.lr)
    loss_fn = nn.NLLLoss()

    best_val_acc, best_model = 0., None
    for i in range(args.epochs):
        train_loss, train_acc = train_model(model, adj, adj_2hop, optimizer, loss_fn)
        val_acc = evaluate_val(model, adj, adj_2hop)
        if (i + 1) % 200 == 0:
            print("ep{}: train loss: {:.4f} train acc: {:.4f} val acc: {:.4f}".format(i + 1, train_loss, train_acc, val_acc))

        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_model = deepcopy(model)

    test_acc = evaluate_test(best_model, adj, adj_2hop)
    print("test acc: {:.4f}".format(test_acc))
    
    # 检查是否获得了更好的结果
    if test_acc > best_accuracy:
        best_lr = lr
        best_epochs = epochs
        best_accuracy = test_acc

# 输出最佳结果
print("Best lr:", best_lr)
print("Best epochs:", best_epochs)
print("Best accuracy:", best_accuracy)

cuda: True
citeseer 6


  r_inv = np.power(rowsum, -1).flatten()


ep200: train loss: 1.5828 train acc: 0.5583 val acc: 0.5420
ep400: train loss: 1.3538 train acc: 0.6500 val acc: 0.5580
ep600: train loss: 1.1582 train acc: 0.7250 val acc: 0.5620
ep800: train loss: 1.0190 train acc: 0.8083 val acc: 0.5800
ep1000: train loss: 0.9144 train acc: 0.8417 val acc: 0.5780
ep1200: train loss: 0.7536 train acc: 0.9167 val acc: 0.5820
ep1400: train loss: 0.6831 train acc: 0.9417 val acc: 0.5880
ep1600: train loss: 0.6194 train acc: 0.9250 val acc: 0.5820
ep1800: train loss: 0.5498 train acc: 0.9583 val acc: 0.5860
ep2000: train loss: 0.4881 train acc: 0.9500 val acc: 0.5960
test acc: 0.5770
cuda: True
citeseer 6
ep200: train loss: 0.6566 train acc: 0.8750 val acc: 0.5660
ep400: train loss: 0.1858 train acc: 0.9833 val acc: 0.6020
ep600: train loss: 0.1239 train acc: 0.9667 val acc: 0.6040
ep800: train loss: 0.0323 train acc: 1.0000 val acc: 0.6020
ep1000: train loss: 0.0144 train acc: 1.0000 val acc: 0.6040
ep1200: train loss: 0.0097 train acc: 1.0000 val acc: 