1. 超参数设置

In [None]:
lr = 0.01
n_epoch = 200
hidden_dim = 16
l2_coef = 5e-4
dataset = 'cora'
dataset_path = './examples/gcn/'
best_model_path = './'
self_loops = 1
gpu = -1
if gpu >= 0:
    tlx.set_device("GPU", gpu)
else:
    tlx.set_device("CPU")

2. 数据集处理与加载

In [None]:
from gammagl.datasets import Planetoid
from gammagl.utils import add_self_loops, mask_to_index

dataset = Planetoid(args.dataset_path, args.dataset)  
# 从gammagl库中导入Planetoid类，用于加载和处理图数据集如Cora
graph = dataset[0]
# 在图的每个节点上添加自环（self-loop）。
edge_index, _ = add_self_loops(graph.edge_index,
num_nodes=graph.num_nodes,
n_loops=args.self_loops)
train_idx = mask_to_index(graph.train_mask)  
test_idx = mask_to_index(graph.test_mask)
val_idx = mask_to_index(graph.val_mask)
# 将布尔掩码序列转换为索引序列的形式
data = {
    "x": graph.x,
    "y": graph.y,
    "edge_index": edge_index,
    "train_idx": train_idx,
    "test_idx": test_idx,
    "val_idx": val_idx,
    "num_nodes": graph.num_nodes,
}

3. 构建卷积层

In [None]:
import tensorlayerx as tlx
from gammagl.utils import degree
from gammagl.layers.conv import MessagePassing
from gammagl.mpops import *
import tensorlayerx.nn as nn

class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels): 
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        # 使用Xavier uniform初始化权重矩阵
        self.linear = tlx.layers.Linear(out_features=out_channels,
        in_features=in_channels,
        W_init='xavier_uniform',
        b_init=None)
        initor = tlx.initializers.Zeros() # 创建一个将所有值初始化为零的初始化器
        # 调用_get_weights方法来获取或创建偏置项的权重
        self.bias = self._get_weights("bias", shape=(1,self.out_channels), init=initor)

    def forward(self, x, edge_index, edge_weight=None, num_nodes=None):
        x = self.linear(x) # 对输入特征进行一次线性变换
        src, dst = edge_index[0], edge_index[1] # 提取源节点/目标节点序列
        edge_weight = tlx.ones(shape=(tlx.get_tensor_shape(edge_index)[1], 1)) 
        edge_weight = tlx.reshape(edge_weight,(-1,)) # 将边权重变为一维数组
        weights = edge_weight
        num_nodes = tlx.get_tensor_shape(x)[0] # 获取输入特征x的节点数量
        eg = degree(src, num_nodes=num_nodes, dtype = tlx.float32) # 计算源节点的度
        norm = tlx.pow(deg, -0.5) # 使用度数的-0.5次方作为归一化系数
        weights = tlx.ops.gather(norm, src) * tlx.reshape(edge_weight, (-1,)) # 应用归一化
        out = self.propagate(x, edge_index,
        edge_weight=weights,
        num_nodes=num_nodes) #消息传递和聚合
        out += self.bias # 如果add_bias为True，则将偏置项加到输出
        return out

    def message_aggregate(self, x, edge_index, edge_weight=None, aggr="sum"):
        edge_weight = tlx.ones(shape=(tlx.get_tensor_shape(edge_index)[1],),
        dtype=tlx.float32)
        out = gspmm(edge_index, edge_weight, x, aggr) # 使用融合算子
        return out

class GCNModel(tlx.nn.Module):
    def __init__(self, feature_dim,
        hidden_dim,
        num_class,
        name=None):
        super().__init__(name=name)
        self.conv1 = GCNConv(feature_dim, hidden_dim) # 构建输入层
        self.conv2 = GCNConv(hidden_dim, num_class) # 构建输出层
        self.relu = tlx.ReLU() # 构建激活函数

    def forward(self, x, edge_index, edge_weight, num_nodes):
        x = self.conv1(x, edge_index, edge_weight, num_nodes)
        x = self.relu(x)
        x = self.conv2(x, edge_index, edge_weight, num_nodes)
        return x
net = GCNModel(feature_dim=dataset.num_node_features,
        hidden_dim=args.hidden_dim,
        num_class=dataset.num_classes,
        name="GCN")


4. 定义损失函数

In [None]:
from tensorlayerx.model import WithLoss, TrainOneStep 
class SemiSpvzLoss(WithLoss):
    def __init__(self, net, loss_fn):
        super(SemiSpvzLoss,   self).__init__(backbone=net, loss_fn=loss_fn)
    def forward(self, data, y):
        logits = self.backbone_network(data['x'],
        data['edge_index'],
        None,
        data['num_nodes']
        )
        # 根据输入的节点特征、边的连接信息等数据计算出模型的输出（logits）
        train_logits = tlx.gather(logits, data['train_idx'])  
        # 通过tlx.gather从标签中选择出训练集的真实标签 
        train_y = tlx.gather(data['y'], data['train_idx']) 
        loss = self._loss_fn(train_logits, train_y)
        return loss

train_weights = net.trainable_weights
loss_func = SemiSpvzLoss(net,   tlx.losses.softmax_cross_entropy_with_logits)


5. 设置优化器

In [None]:
optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.l2_coef)

6. 设置模型评测指标

In [None]:
def calculate_acc(logits, y, metrics):
    metrics.update(logits, y)
    rst = metrics.result()
    metrics.reset()
    return rst
metrics = tlx.metrics.Accuracy()


7. 定义模型训练、推理流程

In [None]:
train_one_step = TrainOneStep(loss_func, optimizer, train_weights)
best_val_acc = 0
for epoch in range(args.n_epoch):
    net.set_train()
    train_loss = train_one_step(data, graph.y) # 进行每轮训练
    net.set_eval()
    logits = net(data['x'], data['edge_index'], None, data['num_nodes']) # 执行模型的前向传播计算，生成预测的输出（logits）
    val_logits = tlx.gather(logits, data['val_idx'])
    val_y = tlx.gather(data['y'], data['val_idx'])
    val_acc = calculate_acc(val_logits, val_y, metrics)# 计算验证集上的准确率。val_logits是验证集的预测值，val_y是验证集的真实标签，metrics是评估标准
    print("Epoch [{:0>3d}] ".format(epoch+1)\
        + " train loss: {:.4f}".format(train_loss.item())\
        + " val acc: {:.4f}".format(val_acc))
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        # 保留验证集上表现最好的模型参数,作为测试集采用的模型参数
        net.save_weights(args.best_model_path+net.name+".npz", format='npz_dict')

net.load_weights(args.best_model_path+net.name+".npz", format='npz_dict')
net.set_eval()
logits = net(data['x'], data['edge_index'], None, data['num_nodes'])
test_logits = tlx.gather(logits, data['test_idx'])
test_y = tlx.gather(data['y'], data['test_idx'])
test_acc = calculate_acc(test_logits, test_y, metrics)
print("Test acc: {:.4f}".format(test_acc))
