<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

* 代码大部分内容类似于 kipf_GCN_keras 实现， 可以进行参考

In [1]:
from __future__ import division
from __future__ import print_function

import time
import numpy as np
import scipy.sparse as sp
import math

import torch
import torch.nn as nn # 构建网络模块
import torch.nn.functional as F  # 网络中函数 例如 F.relu
from torch.nn.parameter import Parameter # 构建的网络的参数
from torch.nn.modules.module import Module # 自己构建的网络需要继承的模块
import torch.optim as optim # 优化器模块

In [2]:
def encode_onehot(labels):
    classes = set(labels)  # 注意因为 set ，所以每次生成的 y 的 onehot 值是不一样的
    classes_dict = {
        c: np.identity(len(classes))[i, :]
        for i, c in enumerate(classes)
    }  # np.identity() 创建一个单位对角阵， 单位矩阵的每一行对应一个one-hot向量
    labels_onehot = np.array(
        list(map(classes_dict.get, labels)),
        dtype=np.int32)  # map(function, iterable)： 对每个 label，应用 class_dict()
    return labels_onehot


def load_data(path="data/cora/", dataset="cora"):
    """Load citation network dataset """
    print('Loading {} dataset...'.format(dataset))

    ### 读取样本id，特征和标签
    idx_features_labels = np.genfromtxt(
        "{}{}.content".format(path, dataset), dtype=np.dtype(
            str))  # np.genfromtxt()生成 array： 文件数据的格式为id features labels
    features = sp.csr_matrix(idx_features_labels[:, 1:-1],
                             dtype=np.float32)  # 提取样本的特征，并将其转换为csr矩阵
    labels = encode_onehot(
        idx_features_labels[:, -1])  # 提取样本的标签，并将其转换为one-hot编码形式
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)  # 样本的id数组
    idx_map = {j: i for i, j in enumerate(idx)}  # 创建一个字典储存数据id

    ### 读取样本之间关系 ： 连边
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(
                         edges_unordered.shape)  # 无序边  map 成为有序

    # 构建邻接矩阵
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)  # 构建图的邻接矩阵
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(
        adj.T >
        adj)  # 矩阵进行对称化： 对于无向图，邻接矩阵是对称的。上一步得到的adj是按有向图构建的，转换成无向图的邻接矩阵需要扩充成对称矩阵。

    features = normalize(features)  # 对特征进行归一化处理
    adj = normalize(adj + sp.eye(adj.shape[0]))  #对邻接矩阵进行归一化处理

    idx_train = range(140)  # 训练集样本
    idx_val = range(200, 500)  # 验证集样本
    idx_test = range(500, 1500)  # 测试集样本

    # 从 numpy 转换为 torch
    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    return adj, features, labels, idx_train, idx_val, idx_test


def normalize(mx):
    """按行对矩阵进行归一化"""
    rowsum = np.array(mx.sum(1))  # 每行的值进行加和 ：x_sum =  (x_11 + x_12 + x_13 ...)
    r_inv = np.power(rowsum, -1).flatten()  # 加和的值取倒数  1/x_sum
    r_inv[np.isinf(r_inv)] = 0.  # 将结果中的无穷值转换为 0 ( x_sum可能为0 ，产生无穷值)
    r_mat_inv = sp.diags(r_inv)  # 将  1/x_sum 进行对角化
    mx = r_mat_inv.dot(mx)  # 初始矩阵和 1/x_sum 对角化矩阵进行乘积运算
    return mx


def accuracy(output, labels):
    """计算准确率"""
    preds = output.max(1)[1].type_as(labels)  # 类型转换
    correct = preds.eq(labels).double()  # 是否相同， true， false
    correct = correct.sum()  # true false 加和
    return correct / len(labels)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(
            np.int64))  # # 获得稀疏矩阵坐标 (2708, 1433)  --> (49216, 2)
    values = torch.from_numpy(sparse_mx.data)  # 相应位置的值 (49216, ) 即矩阵中的所有非零值
    shape = torch.Size(sparse_mx.shape)  # 稀疏矩阵的大小
    return torch.sparse.FloatTensor(indices, values, shape)

In [12]:
%%html
<img src="https://camo.githubusercontent.com/05cc2a7f9417d663c85fa6816cd87b7dcb18cd8d/68747470733a2f2f747661312e73696e61696d672e636e2f6c617267652f30303753385a496c6c793167653834366e66666b686a333077743067796a76642e6a7067", width=320, heigth=240>

In [3]:
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()  # 确保父类被正确的初始化了

        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(
            torch.FloatTensor(in_features, out_features)
        )  # 当Paramenters赋值给Module的属性的时候，他会自动的被加到 Module的 参数列表中

        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters() # 进行参数初始化

    def reset_parameters(self):
        """参数初始化方式"""
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv) # 权重满足 
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight) # 将输入特征矩阵与权重参数矩阵相
        output = torch.spmm(adj, support) # 左乘标准化的邻接矩阵，邻接矩阵的存储时用的是稀疏矩阵
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        """输出类内部变量的名称"""
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [4]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        # 定义所需要的操作 ： 图卷积层+ dropout
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        """每次运行时都会执行的步骤，所有自定义的module都要重写这个函数"""
        x = F.relu(self.gc1(x, adj)) # 第一层图卷积 + relu
        x = F.dropout(x, self.dropout, training=self.training) 
        x = self.gc2(x, adj) # 第二层图卷积
        return F.log_softmax(x, dim=1) # 计算 sotmax + log 输出

In [7]:
# 超参数

seed = 42
epochs =200 # Number of epochs to train.
lr = 0.01 # Initial learning rate.
weight_decay = 5e-4 # Weight decay (L2 loss on parameters)
hidden = 16 # Number of hidden units.'
dropout = 0.5 # Dropout rate (1 - keep probability)
fastmode = False # val 时候是否和训练区分（dropout， BN）

In [6]:
# 导入数据
adj, features, labels, idx_train, idx_val, idx_test = load_data()

# 构建模型
model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)
# 创建优化器
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # 包含了权重正则化部分的 loss

Loading cora dataset...


In [8]:
def train(epoch):
    """标准 pytorch 神经网络流程"""
    t = time.time()
    model.train() # 先将model置为训练状态
    optimizer.zero_grad() # 梯度置0
    output = model(features, adj) # 将输入送到模型得到输出结果
    loss_train = F.nll_loss(
        output[idx_train], labels[idx_train]
    )  # ；计算损失与准确率；交叉熵loss， 因为模型计算包含 log， 这里使用 nll_loss（CrossEntropyLoss =Softmax+Log+NLLLoss）

    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward() # 反向传播求梯度
    optimizer.step() # 更新参数

    if not fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval(
        )  #pytorch会自动把BN和DropOut固定住, dropout和batch normalization的操作在训练和测试的时候是不一样的
        output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    print('Epoch: {:04d}'.format(epoch + 1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))


def test():
    model.eval() # 置为 evaluation 状态 
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

In [9]:
# 开始训练
t_total = time.time()
for epoch in range(epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

Epoch: 0001 loss_train: 2.0122 acc_train: 0.1500 loss_val: 2.0183 acc_val: 0.1200 time: 0.0317s
Epoch: 0002 loss_train: 1.9934 acc_train: 0.1000 loss_val: 1.9994 acc_val: 0.1600 time: 0.0175s
Epoch: 0003 loss_train: 1.9656 acc_train: 0.2357 loss_val: 1.9822 acc_val: 0.1567 time: 0.0144s
Epoch: 0004 loss_train: 1.9460 acc_train: 0.2000 loss_val: 1.9661 acc_val: 0.1567 time: 0.0127s
Epoch: 0005 loss_train: 1.9270 acc_train: 0.2143 loss_val: 1.9505 acc_val: 0.1567 time: 0.0119s
Epoch: 0006 loss_train: 1.9155 acc_train: 0.2000 loss_val: 1.9352 acc_val: 0.1567 time: 0.0126s
Epoch: 0007 loss_train: 1.8913 acc_train: 0.2000 loss_val: 1.9200 acc_val: 0.1567 time: 0.0127s
Epoch: 0008 loss_train: 1.8845 acc_train: 0.2000 loss_val: 1.9052 acc_val: 0.1567 time: 0.0117s
Epoch: 0009 loss_train: 1.8729 acc_train: 0.2000 loss_val: 1.8902 acc_val: 0.1567 time: 0.0115s
Epoch: 0010 loss_train: 1.8567 acc_train: 0.2000 loss_val: 1.8755 acc_val: 0.1567 time: 0.0117s
Epoch: 0011 loss_train: 1.8375 acc_train

Epoch: 0089 loss_train: 0.7589 acc_train: 0.8786 loss_val: 1.0097 acc_val: 0.7733 time: 0.0138s
Epoch: 0090 loss_train: 0.7995 acc_train: 0.8357 loss_val: 1.0026 acc_val: 0.7833 time: 0.0165s
Epoch: 0091 loss_train: 0.8170 acc_train: 0.8429 loss_val: 0.9955 acc_val: 0.7833 time: 0.0168s
Epoch: 0092 loss_train: 0.7629 acc_train: 0.8643 loss_val: 0.9887 acc_val: 0.7900 time: 0.0190s
Epoch: 0093 loss_train: 0.7805 acc_train: 0.8571 loss_val: 0.9817 acc_val: 0.7900 time: 0.0206s
Epoch: 0094 loss_train: 0.7111 acc_train: 0.8929 loss_val: 0.9737 acc_val: 0.7900 time: 0.0153s
Epoch: 0095 loss_train: 0.7874 acc_train: 0.8357 loss_val: 0.9660 acc_val: 0.7900 time: 0.0122s
Epoch: 0096 loss_train: 0.7656 acc_train: 0.8857 loss_val: 0.9583 acc_val: 0.7933 time: 0.0124s
Epoch: 0097 loss_train: 0.7598 acc_train: 0.8429 loss_val: 0.9510 acc_val: 0.7967 time: 0.0138s
Epoch: 0098 loss_train: 0.7509 acc_train: 0.8500 loss_val: 0.9440 acc_val: 0.7967 time: 0.0139s
Epoch: 0099 loss_train: 0.7228 acc_train

Epoch: 0190 loss_train: 0.4102 acc_train: 0.9357 loss_val: 0.7092 acc_val: 0.8167 time: 0.0165s
Epoch: 0191 loss_train: 0.3966 acc_train: 0.9643 loss_val: 0.7081 acc_val: 0.8167 time: 0.0122s
Epoch: 0192 loss_train: 0.4595 acc_train: 0.9214 loss_val: 0.7066 acc_val: 0.8167 time: 0.0148s
Epoch: 0193 loss_train: 0.4161 acc_train: 0.9143 loss_val: 0.7061 acc_val: 0.8133 time: 0.0127s
Epoch: 0194 loss_train: 0.4422 acc_train: 0.9214 loss_val: 0.7065 acc_val: 0.8133 time: 0.0115s
Epoch: 0195 loss_train: 0.3905 acc_train: 0.9643 loss_val: 0.7069 acc_val: 0.8133 time: 0.0119s
Epoch: 0196 loss_train: 0.3947 acc_train: 0.9571 loss_val: 0.7075 acc_val: 0.8133 time: 0.0113s
Epoch: 0197 loss_train: 0.3923 acc_train: 0.9214 loss_val: 0.7090 acc_val: 0.8133 time: 0.0115s
Epoch: 0198 loss_train: 0.4342 acc_train: 0.9500 loss_val: 0.7116 acc_val: 0.8067 time: 0.0125s
Epoch: 0199 loss_train: 0.3986 acc_train: 0.9286 loss_val: 0.7125 acc_val: 0.8067 time: 0.0112s
Epoch: 0200 loss_train: 0.4183 acc_train

In [10]:
# Testing
test()

Test set results: loss= 0.7367 accuracy= 0.8150
