In [1]:
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F

class Encoder(nn.Module):
    """
    Encodes a node's using 'convolutional' GraphSage approach
    """
    def __init__(self, features, feature_dim, 
            embed_dim, adj_lists, aggregator,
            num_sample=10,
            base_model=None, gcn=False, cuda=False, 
            feature_transform=False): 
        super(Encoder, self).__init__()

        self.features = features
        # 变换前的hidden_size/维度
        self.feat_dim = feature_dim
        self.adj_lists = adj_lists
        # 即邻居聚合后的mebedding: agg1 = MeanAggregator(features, cuda=True)
        self.aggregator = aggregator
        self.num_sample = num_sample
        if base_model != None:
            self.base_model = base_model

        # 默认False, model.py里面设置成True
        self.gcn = gcn
        # 变换后的hidden_size/维度
        self.embed_dim = embed_dim
        self.cuda = cuda
        self.aggregator.cuda = cuda
        # 矩阵W维度 = 变换后维度 * 变换前维度
        # 其中gcn表示是否拼接，如果拼接的话由于是"自身向量||邻居聚合向量", 所以维度为2倍
        self.weight = nn.Parameter(
                torch.FloatTensor(embed_dim, self.feat_dim if self.gcn else 2 * self.feat_dim))
        init.xavier_uniform(self.weight)

    def forward(self, nodes):
        """
        Generates embeddings for a batch of nodes.

        nodes     -- list of nodes
        """
        # 调用aggregator.py文件中的MeanAggregator class的forward函数，得到聚合邻居的信息
        neigh_feats = self.aggregator.forward(nodes, [self.adj_lists[int(node)] for node in nodes], 
                self.num_sample)
        if not self.gcn:
            if self.cuda:
                self_feats = self.features(torch.LongTensor(nodes).cuda())
            else:
                self_feats = self.features(torch.LongTensor(nodes))
            # 将自身和聚合邻居的向量拼接, algorithm 1 line 5的拼接部分
            combined = torch.cat([self_feats, neigh_feats], dim=1)
        else:
            # 只用聚合邻居的向量来表示，不用自身信息, algorithm 1 line 5的拼接部分
            combined = neigh_feats
        # 送入到神经网络，algorithm 1 line 5乘以矩阵W
        combined = F.relu(self.weight.mm(combined.t()))
        # 经过一层GNN layer后的点的embedding，维度为embed_dim * nodes
        return combined

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

import random

"""
Set of modules for aggregating embeddings of neighbors.
"""

# 实现聚合类，对邻居信息进行AGGREGATE
class MeanAggregator(nn.Module):
    """
    Aggregates a node's embeddings using mean of neighbors' embeddings
    """
    def __init__(self, features, cuda=False, gcn=False): 
        """
        Initializes the aggregator for a specific graph.

        features -- function mapping LongTensor of node ids to FloatTensor of feature values.
        cuda -- whether to use GPU
        gcn --- whether to perform concatenation GraphSAGE-style, or add self-loops GCN-style
        """

        super(MeanAggregator, self).__init__()

        self.features = features
        self.cuda = cuda
        self.gcn = gcn


    def forward(self, nodes, to_neighs, num_sample=10):
        """
        # batch中的点的列表
        nodes --- list of nodes in a batch
        # batch中每个点对应的邻居集合
        to_neighs --- list of sets, each set is the set of neighbors for node in batch
        num_sample --- number of neighbors to sample. No sampling if None.
        """
        # Local pointers to functions (speed hack)
        _set = set
        if not num_sample is None:
            _sample = random.sample
            # 首先对每一个节点的邻居集合neigh进行遍历，判断一下已有邻居数和采样数大小，多于采样数进行抽样
            # 对一个batch中的每一个节点的邻接点set进行sample
            samp_neighs = [_set(_sample(to_neigh, 
                            num_sample,
                            )) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs]
        else:
            samp_neighs = to_neighs

        # 将自己也作为自己的邻居点 (类似于GCN里面的A + I的操作)
        if self.gcn:
            samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
        # *拆解列表后，转为为多个独立的元素作为参数给union，union函数进行去重合并
        unique_nodes_list = list(set.union(*samp_neighs))
        # 节点标号不一定都是从0开始的，创建一个字典，key为节点ID，value为节点序号 (old id到new id的转换，为下面列切片做准备)
        unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)}
        # print(len(nodes), len(unique_nodes), len(samp_neighs))

        # 构建缩小的邻接矩阵，即这个batch所用到的点所构成的小的邻接矩阵
        # nodes表示batch内的节点，unique_nodes表示batch内的节点用到的所有邻居节点，unique_nodes > nodes
        # len(samp_neighs)是这个batch的大小，即nodes数量，创建一个nodes * unique_nodes大小的邻接矩阵
        mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
        # 列切片, 遍历每一个邻居集合的每一个元素，并且通过unique_nodes(old id)获取到节点对应的序号
        column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
        # 行切片, 比如samp_neighs = [{3,5,9}, {2,8}, {2}]，行切片为[0,0,0,1,1,2]
        row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))]
        # 利用切片创建图的邻接矩阵
        # 即(row_indices[i], column_indices[i])对应的位置为1
        mask[row_indices, column_indices] = 1
        if self.cuda:
            mask = mask.cuda()

        # 构造邻接矩阵
        # 统计每一个节点的邻居数量
        num_neigh = mask.sum(1, keepdim=True)
        # 归一化(除以邻居数量)
        mask = mask.div(num_neigh)
        # embed_matrix: [n, m]
        # n: unique_nodes
        # m: dim
        if self.cuda:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
        else:
            embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
        # mask是nodes * unique_nodes大小的邻接矩阵, embed_matrix是unique_nodes * hid_size的特征矩阵
        # 即A * X, 这里A是邻接矩阵， X是特征矩阵，这里一系列的操作是按batch训练需要采样出一个局部的A
        to_feats = mask.mm(embed_matrix)
        return to_feats

In [3]:
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable

import numpy as np
import time
import random
from sklearn.metrics import f1_score
from collections import defaultdict

In [4]:
class SupervisedGraphSage(nn.Module):

    def __init__(self, num_classes, enc):
        super(SupervisedGraphSage, self).__init__()
        # 这里面赋值为enc2(经过两层GNN)
        self.enc = enc
        self.xent = nn.CrossEntropyLoss()
        # 全连接参数矩阵，映射到labels num_classes维度做分类
        self.weight = nn.Parameter(torch.FloatTensor(num_classes, enc.embed_dim))
        init.xavier_uniform(self.weight)

    def forward(self, nodes):
        # embeds实际是我们两层GNN后的输出nodes embedding
        embeds = self.enc(nodes)
        # 最后将nodes * hidden size 映射到 nodes * num_classes(= 7)之后做softmax计算cross entropy
        scores = self.weight.mm(embeds)
        return scores.t()

    def loss(self, nodes, labels):
        # 钱箱传播
        scores = self.forward(nodes)
        # 定义的cross entropy
        return self.xent(scores, labels.squeeze())

In [11]:
def load_cora():
    # 点的数量
    num_nodes = 2708
    # 特征数量
    num_feats = 1433
    # 构建特征矩阵
    feat_data = np.zeros((num_nodes, num_feats))
    # 构建节点的ground truth标签
    labels = np.empty((num_nodes,1), dtype=np.int64)
    # 做一个点的id映射
    node_map = {}
    label_map = {}

    # 读节点特征
    # cora.content第一列是node id, 中间为点的特征，最后一列为label
    # with open("cora/cora.content") as fp:
    with open("./cora/cora.content") as fp:
        for i,line in enumerate(fp):
            info = line.strip().split()
            # 特征，全部转换成float类型
            # feat_data[i,:] = map(float, info[1:-1])
            tmp = []
            for ss in info[1:-1]:
                tmp.append(float(ss))
            feat_data[i,:] = tmp
            
            # 将点的id转换，映射到从0开始的。info[0]是node old id,
            node_map[info[0]] = i
            # info[-1]是label, 字符串, 比如'Neural_Networks'和'Rule_Learning', 转换成int来表示类
            if not info[-1] in label_map:
                label_map[info[-1]] = len(label_map)
            labels[i] = label_map[info[-1]]

    # 读图存储成邻接表
    adj_lists = defaultdict(set)
    with open("./cora/cora.cites") as fp:
        for i,line in enumerate(fp):
            # 每一行是一条边
            info = line.strip().split()
            paper1 = node_map[info[0]]
            paper2 = node_map[info[1]]
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
    # 举例：(a, b) (a, c) (a, d) (b, c) (b, d)
    # 存储后 {a: set(b, c, d), b: set(a, c, d), c: set(a, b), d: set(a, b)}
    return feat_data, labels, adj_lists

In [12]:
def run_cora():
    # 随机数设置seed(种子)
    np.random.seed(1)
    random.seed(1)
    # cora数据集点数
    num_nodes = 2708
    # 加载cora数据集, 分别是
    # feat_data: 特征
    # labels: 标签
    # adj_lists: 邻接表，dict (key: node, value: neighbors set)
    feat_data, labels, adj_lists = load_cora()
    # 设置输入的input features矩阵X的维度 = 点的数量 * 特征维度
    features = nn.Embedding(2708, 1433)
    # 为矩阵X赋值，参数不更新
    features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
    # features.cuda()

    # 一共两层GNN layer
    # 第一层GNN
    # 以mean的方式聚合邻居, algorithm 1 line 4
    agg1 = MeanAggregator(features, cuda=True)
    # 将自身和聚合邻居的向量拼接后送入到神经网络(可选是否只用聚合邻居的信息来表示), algorithm 1 line 5
    enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False)

    # 第二层GNN
    # 将第一层的GNN输出作为输入传进去
    # 这里面.t()表示转置，是因为Encoder class的输出维度为embed_dim * nodes
    agg2 = MeanAggregator(lambda nodes : enc1(nodes).t(), cuda=False)
    # enc1.embed_dim = 128, 变换后的维度还是128
    enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2,
            base_model=enc1, gcn=True, cuda=False)

    # 采样的邻居点的数量
    enc1.num_samples = 5
    enc2.num_samples = 5

    # 7分类问题
    # enc2是经过两层GNN layer时候得到的 node embedding/features
    graphsage = SupervisedGraphSage(7, enc2)
    # graphsage.cuda()

    # 目的是打乱节点顺序
    rand_indices = np.random.permutation(num_nodes)

    # 划分测试集、验证集、训练集
    test = rand_indices[:1000]
    val = rand_indices[1000:1500]
    train = list(rand_indices[1500:])

    # 用SGD的优化，设置学习率
    optimizer = torch.optim.SGD(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=0.7)
    # 记录每个batch训练时间
    times = []
    # 共训练100个batch
    for batch in range(100):
        # 取256个nodes作为一个batch
        batch_nodes = train[:256]
        # 打乱训练集的顺序，使下次迭代batch随机
        random.shuffle(train)
        # 记录开始时间
        start_time = time.time()
        optimizer.zero_grad()
        # 这个是SupervisedGraphSage里面定义的cross entropy loss
        loss = graphsage.loss(batch_nodes, 
                Variable(torch.LongTensor(labels[np.array(batch_nodes)])))
        # 反向传播和更新参数
        loss.backward()
        optimizer.step()
        # 记录结束时间
        end_time = time.time()
        times.append(end_time-start_time)
        # print (batch, loss.data[0])
        print (batch, loss.data)

    # 做validation
    val_output = graphsage.forward(val)
    # 计算micro F1 score
    print ("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro"))
    # 计算每个batch的平均训练时间
    print ("Average batch time:", np.mean(times))

In [13]:
run_cora()

  init.xavier_uniform(self.weight)
  init.xavier_uniform(self.weight)
since Python 3.9 and will be removed in a subsequent version.
  samp_neighs = [_set(_sample(to_neigh,


0 tensor(1.9582)
1 tensor(1.9414)
2 tensor(1.9168)
3 tensor(1.8915)
4 tensor(1.8607)
5 tensor(1.8367)
6 tensor(1.8065)
7 tensor(1.7766)
8 tensor(1.7207)
9 tensor(1.6772)
10 tensor(1.5826)
11 tensor(1.5639)
12 tensor(1.5026)
13 tensor(1.4706)
14 tensor(1.4020)
15 tensor(1.3103)
16 tensor(1.2550)
17 tensor(1.2003)
18 tensor(1.0233)
19 tensor(0.9690)
20 tensor(0.9384)
21 tensor(0.8957)
22 tensor(0.8020)
23 tensor(0.7801)
24 tensor(0.7624)
25 tensor(0.8598)
26 tensor(0.7577)
27 tensor(0.9079)
28 tensor(0.9362)
29 tensor(0.9938)
30 tensor(0.5717)
31 tensor(0.6399)
32 tensor(0.5354)
33 tensor(0.5127)
34 tensor(0.4802)
35 tensor(0.4303)
36 tensor(0.4532)
37 tensor(0.5168)
38 tensor(0.5495)
39 tensor(0.6705)
40 tensor(0.5561)
41 tensor(0.4420)
42 tensor(0.3657)
43 tensor(0.4114)
44 tensor(0.3975)
45 tensor(0.3443)
46 tensor(0.3273)
47 tensor(0.3100)
48 tensor(0.3277)
49 tensor(0.3752)
50 tensor(0.3119)
51 tensor(0.3798)
52 tensor(0.3248)
53 tensor(0.4031)
54 tensor(0.2793)
55 tensor(0.3573)
56