# 常规赛：论文引用节点分类-5月第三名笔记


**本项目参考自同学的思路，原项目地址如下：[https://aistudio.baidu.com/aistudio/projectdetail/2025629](https://aistudio.baidu.com/aistudio/projectdetail/2025629)。**

整体思路:小模型（残差结构）+ Unimp => 本地投票 

运行环境 ：paddle 1.8.4 python3 GPU

根据往期大神们的经验，使用残差结构可以提点

训练次数达到300轮左右，趋于稳定

![](https://ai-studio-static-online.cdn.bcebos.com/81b3a9b117154b479ff76afd89f2db53edf7c5964dee4c8e846da62bfeea90c6)


# 残差结构模型设计 

In [1]:
class ResGAT(object):
    """Implement of ResGAT"""
    def __init__(self, config, num_class):
        self.num_class = num_class 
        self.num_layers = config.get("num_layers", 1)
        self.num_heads = config.get("num_heads", 8)
        self.hidden_size = config.get("hidden_size", 8)
        self.feat_dropout = config.get("feat_drop", 0.6)
        self.attn_dropout = config.get("attn_drop", 0.6)
        self.edge_dropout = config.get("edge_dropout", 0.0)

    def forward(self, graph_wrapper, feature, phase):
        # feature [num_nodes, 100]
        if phase == "train": 
            edge_dropout = self.edge_dropout
        else:
            edge_dropout = 0
        feature = L.fc(feature, size=self.hidden_size * self.num_heads, name="init_feature")
        for i in range(self.num_layers):
            ngw = pgl.sample.edge_drop(graph_wrapper, edge_dropout) 
            
            res_feature = feature
            # res_feature [num_nodes, hidden_size * n_heads]
            feature = conv.gat(ngw,
                                feature,
                                self.hidden_size,
                                activation=None,
                                name="gat_layer_%s" % i,
                                num_heads=self.num_heads,
                                feat_drop=self.feat_dropout,
                                attn_drop=self.attn_dropout)
            # feature [num_nodes, num_heads * hidden_size]
            feature = res_feature + feature 
            # [num_nodes, num_heads * hidden_size] + [ num_nodes, hidden_size * n_heads]
            feature = L.relu(feature)
            feature = L.layer_norm(feature, name="ln_%s" % i)

        ngw = pgl.sample.edge_drop(graph_wrapper, edge_dropout) 
        feature = conv.gat(ngw,
                     feature,
                     self.num_class,
                     num_heads=1,
                     activation=None,
                     feat_drop=self.feat_dropout,
                     attn_drop=self.attn_dropout,
                     name="output")
        return feature


class ResGCN(object):
    """Implement of GCN
    """
    def __init__(self, config, num_class):
        self.num_class = num_class
        self.num_layers = config.get("num_layers", 1)
        self.hidden_size = config.get("hidden_size", 64)
        self.dropout = config.get("dropout", 0.5)
        self.edge_dropout = config.get("edge_dropout", 0.0)

    def forward(self, graph_wrapper, feature, phase):
        
        for i in range(self.num_layers):
            
            if phase == "train":
                ngw = pgl.sample.edge_drop(graph_wrapper, self.edge_dropout) 
                norm = get_norm(ngw.indegree())
            else:
                ngw = graph_wrapper
                norm = graph_wrapper.node_feat["norm"]

            res_feature = L.fc(feature, size=self.hidden_size, name="res_feature")
            
            feature = pgl.layers.gcn(ngw,
                feature,
                self.hidden_size,
                activation="relu",
                norm=norm,
                name="layer_%s" % i)

            feature = res_feature + feature 

            feature = L.dropout(
                    feature,
                    self.dropout,
                    dropout_implementation='upscale_in_train')

        if phase == "train": 
            ngw = pgl.sample.edge_drop(graph_wrapper, self.edge_dropout) 
            norm = get_norm(ngw.indegree())
        else:
            ngw = graph_wrapper
            norm = graph_wrapper.node_feat["norm"]

        feature = conv.gcn(ngw,
                     feature,
                     self.num_class,
                     activation=None,
                     norm=norm,
                     name="output")

        return feature

        feature = L.fc(feature, self.num_class, act=None, name="output")
        return feature

# 保存输出，进行投票

这里将训练出来的文件进行简单投票
```
import csv
from collections import Counter

def vote_merge(filelst):
    result = {}
    fw = open('D:/subexl/76/merge.csv', encoding='utf-8', mode='w', newline='')
    csv_writer = csv.writer(fw)
    csv_writer.writerow(['nid', 'label'])
    for filepath in filelst:
        cr = open(filepath, encoding='utf-8', mode='r')
        csv_reader = csv.reader(cr)
        for i, row in enumerate(csv_reader):
            if i == 0:
                continue
            idx, cls = row
            if idx not in result:
                result[idx] = []
            result[idx].append(cls)

    for nid, clss in result.items():
        counter = Counter(clss)
        true_cls = counter.most_common(1)
        csv_writer.writerow([nid, true_cls[0][0]])

if __name__ == '__main__':
    vote_merge([
        "D:/subexl/76/0.75736.csv",
        "D:/subexl/76/0.75755.csv",
        "D:/subexl/76/0.75801.csv",
        "D:/subexl/76/0.75868.csv",
        "D:/subexl/76/0.75978.csv",
        "D:/subexl/76/0.76436.csv",
        "D:/subexl/76/0.759664.csv",
        "D:/subexl/76/0.75973517.csv",
        "D:/subexl/76/0.75980633.csv",
        "D:/subexl/76/0.76322347.csv",
        "D:/subexl/76/0.763223471.csv",
        "D:/subexl/76/submission.csv",
                ])
```

# UniMP：统一消息传递模型

在半监督图节点分类场景下，节点之间通过边相连接，部分节点被打上标签。任务要求模型通过监督学习的方式，拟合被标注节点数据，并对未标注的节点进行预测。如下图所示，在一般机器学习的问题上，已标注的训练数据在新数据的推断上，并不能发挥直接的作用，因为数据的输入是独立的。然而在图神经网络的场景下，已有的标注数据可以从节点与节点的连接中，根据图结构关系推广到新的未标注数据中。

![](https://ai-studio-static-online.cdn.bcebos.com/7fca182598e44fcba9c10e8713672a21f7964f8ffac54e688b6e6842de76970a)



一般应用于半监督节点分类的算法分为图神经网络和标签传递算法两类，它们都是通过消息传递的方式(前者传递特征、后者传递标签)进行节点标签的学习和预测。其中经典标签传递算法如LPA，只考虑了将标签在图上进行传递，而图神经网络算法大多也只是使用了节点特征以及图的链接信息进行分类。但是单纯考虑标签传递或者节点特征都是不足够的。

![](https://ai-studio-static-online.cdn.bcebos.com/04ca472bda354322a1f36416dbf98866a5e702b489ba4abaa3ad0debbae76a90)


百度PGL团队提出的统一消息传递模型 UniMP，将上述两种消息统一到框架中，同时实现了节点的特征与标签传递，显著提升了模型的泛化效果。 UniMP以Graph Transformer模型作为基础骨架，联合使用标签嵌入方法，将节点特征和部分节点标签同时输入至模型中，从而实现了节点特征和标签的同时传递。

简单的加入标签信息会带来标签泄漏的问题，即标签信息即是特征又是训练目标。实际上，标签大部分是有顺序的，例如在引用网络中，论文是按照时间先后顺序出现的，其标签也应该有一定的先后顺序。在无法得知训练集标签顺序的情况下，UniMP提出了标签掩码学习方法。UniMP每一次随机将一定量的节点标签掩码为未知，用部分已有的标注信息、图结构信息以及节点特征来还原训练数据的标签。最终，UniMP在OGB上取得SOTA效果，并在论文的消融实验上，验证了方法的有效性。

使用该模型结构，结合残差网络，加上调整好参数，集成模型即可取得top1的分数.

# unimp代码

```
class UniMP(object):
    def __init__(self, config, num_class):
        self.num_class = num_class
        self.num_layers = config.get("num_layers", 2)
        self.hidden_size = config.get("hidden_size", 64)
        self.out_size=config.get("out_size", 40)
        self.embed_size=config.get("embed_size", 100)
        self.heads = config.get("heads", 8) 
        self.dropout = config.get("dropout", 0.3)
        self.edge_dropout = config.get("edge_dropout", 0.0)
        self.use_label_e = config.get("use_label_e", False)
            
    
    def embed_input(self, feature):
        
        lay_norm_attr=F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=1))
        lay_norm_bias=F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0))
        feature=L.layer_norm(feature, name='layer_norm_feature_input', 
                                      param_attr=lay_norm_attr, 
                                      bias_attr=lay_norm_bias)
        
        return feature

    def label_embed_input(self, feature):
        label = F.data(name="label", shape=[None, 1], dtype="int64")
        label_idx = F.data(name='label_idx', shape=[None, 1], dtype="int64")

        label = L.reshape(label, shape=[-1])
        label_idx = L.reshape(label_idx, shape=[-1])

        embed_attr = F.ParamAttr(initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0))
        embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr )

        feature_label = L.gather(feature, label_idx, overwrite=False)
        feature_label = feature_label + embed
        feature = L.scatter(feature, label_idx, feature_label, overwrite=True)

        
        lay_norm_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=1))
        lay_norm_bias = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0))
        feature = L.layer_norm(feature, name='layer_norm_feature_input', 
                                      param_attr=lay_norm_attr, 
                                      bias_attr=lay_norm_bias)
        return feature
    
    def get_gat_layer(self, i, gw, feature, hidden_size, num_heads, concat=True,
                      layer_norm=True, relu=True, gate=False):
        fan_in=feature.shape[-1]
        bias_bound = 1.0 / math.sqrt(fan_in)
        fc_bias_attr = F.ParamAttr(initializer=F.initializer.UniformInitializer(low=-bias_bound, high=bias_bound))
        
        negative_slope = math.sqrt(5)
        gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
        std = gain / math.sqrt(fan_in)
        weight_bound = math.sqrt(3.0) * std
        fc_w_attr = F.ParamAttr(initializer=F.initializer.UniformInitializer(low=-weight_bound, high=weight_bound))
        
        if concat:
            skip_feature = L.fc(feature,
                         hidden_size * num_heads,
                           param_attr=fc_w_attr,
                           name='fc_skip_' + str(i),
                           bias_attr=fc_bias_attr)
        else:
            skip_feature = L.fc(feature,
                         hidden_size,
                           param_attr=fc_w_attr,
                           name='fc_skip_' + str(i),
                           bias_attr=fc_bias_attr)
        out_feat = transformer_gat_pgl(gw, feature, hidden_size, 'gat_' + str(i), num_heads, concat=concat) 
        # out_feat= out_feat + skip_feature
        
        if gate: 
            fan_in = out_feat.shape[-1]*3
            bias_bound = 1.0 / math.sqrt(fan_in)
            fc_bias_attr = F.ParamAttr(initializer=F.initializer.UniformInitializer(low=-bias_bound, high=bias_bound))

            negative_slope = math.sqrt(5)
            gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
            std = gain / math.sqrt(fan_in)
            weight_bound = math.sqrt(3.0) * std
            fc_w_attr = F.ParamAttr(initializer=F.initializer.UniformInitializer(low=-weight_bound, high=weight_bound))
            gate_f = L.fc([skip_feature, out_feat, out_feat - skip_feature], 1,
                           param_attr=fc_w_attr,
                           name='gate_' + str(i),
                           bias_attr=fc_bias_attr)
            
            gate_f = L.sigmoid(gate_f) 
            out_feat = skip_feature * gate_f + out_feat * (1 - gate_f)
        else:
            out_feat = out_feat + skip_feature
                 
        if layer_norm:
            lay_norm_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=1))
            lay_norm_bias = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0))
            out_feat = L.layer_norm(out_feat, name='layer_norm_' + str(i), 
                                      param_attr=lay_norm_attr, 
                                      bias_attr=lay_norm_bias)
        if relu:
            out_feat = L.relu(out_feat)
        return out_feat
        
    def forward(self, graph_wrapper, feature, phase):
        if phase == "train": 
            edge_dropout = self.edge_dropout
            dropout = self.dropout
        else:
            edge_dropout = 0
            dropout = 0

        if self.use_label_e:
            feature = self.label_embed_input(feature)
            gate = True
        else:
            feature = self.embed_input(feature)
            gate = False
        if dropout > 0:
            feature = L.dropout(feature, dropout_prob=dropout, 
                                    dropout_implementation='upscale_in_train')
        for i in range(self.num_layers - 1):
            ngw = pgl.sample.edge_drop(graph_wrapper, edge_dropout) 
            feature = self.get_gat_layer(i, ngw, feature, 
                                             hidden_size=self.hidden_size,
                                             num_heads=self.heads, 
                                             concat=True, 
                                             layer_norm=True, relu=True, gate=gate)
            if dropout > 0:
                feature = L.dropout(feature, dropout_prob=self.dropout, 
                                     dropout_implementation='upscale_in_train') 

        feature = self.get_gat_layer(self.num_layers - 1, ngw, feature, 
                                           hidden_size=self.out_size, num_heads=self.heads, 
                                             concat=False, layer_norm=False, relu=False, gate=True)
  
        pred = L.fc(
            feature, self.num_class, act=None, name="pred_output")
        return pred
```

# 运行方式

In [3]:
!pip install --upgrade python-dateutil
!pip install easydict
!pip install pgl==1.2.0 
!pip install pandas>=0.25
!pip install pyarrow==0.13.0
!pip install chardet==3.0.4
!python main.py

Looking in indexes: https://mirror.baidu.com/pypi/simple/
Collecting python-dateutil
[?25l  Downloading https://mirror.baidu.com/pypi/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl (227kB)
[K     |████████████████████████████████| 235kB 16.5MB/s eta 0:00:01
[31mERROR: blackhole 0.3.2 has requirement xgboost==1.1.0, but you'll have xgboost 1.3.3 which is incompatible.[0m
Installing collected packages: python-dateutil
  Found existing installation: python-dateutil 2.8.0
    Uninstalling python-dateutil-2.8.0:
      Successfully uninstalled python-dateutil-2.8.0
Successfully installed python-dateutil-2.8.1
Looking in indexes: https://mirror.baidu.com/pypi/simple/
Looking in indexes: https://mirror.baidu.com/pypi/simple/
Collecting pgl==1.2.0
[?25l  Downloading https://mirror.baidu.com/pypi/packages/35/fa/2290e78914d34d4e4480d7982b8f4d0c58a7e53535113a668a9d75d5c3b6/pgl-1.2.0-cp37-cp37m-manylinux1_x86_64.whl (7.9MB