# Preparing work

包括超参数设置和数据处理。

本次复现采用的数据集是ACM数据集，包含8994个点和25922条边，有三种节点类型和四种边的类型，节点特征维度是1902维。一共有3025个有标签的节点，其中划分训练集600，验证集300，测试集2125。

In [18]:
import torch
import numpy as np
import torch.nn as nn
import pickle
import torch.nn.functional as F
import math

In [11]:
# 导入数据
with open('data/ACM/node_features.pkl', 'rb') as f:
    node_features = pickle.load(f)  # (8994, 1902)
with open('data/ACM/edges.pkl', 'rb') as f:  # 多个异构图的邻接矩阵
    edges = pickle.load(f)
with open('data/ACM/labels.pkl', 'rb') as f:
    labels = pickle.load(f)
num_nodes = edges[0].shape[0]
# 每一个邻接矩阵边的长度是节点总数 有四种边 所以有四个边的邻接矩阵
# labels 由3个矩阵组成 分别代表训练集验证集测试集 labels一共只有3025个 因为半监督学习就是 一部分数据有标签 大部分没有 通过有的去划分训练集验证集和测试集

  edges = pickle.load(f)
  edges = pickle.load(f)


In [13]:
# edges组合成一个张量
for i, edge in enumerate(edges):
    if i == 0:
        A = torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)
    else:
        tmp = torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1)
        A = torch.cat([A, tmp], dim=-1)
A = torch.cat([A, torch.eye(num_nodes).type(torch.FloatTensor).unsqueeze(-1)], dim=-1)

node_features = torch.from_numpy(node_features).type(torch.FloatTensor)
train_node = torch.from_numpy(np.array(labels[0][:, 0])).type(torch.LongTensor)
train_target = torch.from_numpy(np.array(labels[0])[:, 1]).type(torch.LongTensor)
valid_node = torch.from_numpy(np.array(labels[1])[:, 0]).type(torch.LongTensor)
valid_target = torch.from_numpy(np.array(labels[1])[:, 1]).type(torch.LongTensor)
test_node = torch.from_numpy(np.array(labels[2])[:, 0]).type(torch.LongTensor)
test_target = torch.from_numpy(np.array(labels[2])[:, 1]).type(torch.LongTensor)

In [16]:
print('A shape: ', A.shape)
print('node_features shape:', node_features.shape)
print('train_node shape:', train_node.shape)
print('valid_node.shape:', valid_node.shape)
print('test_node.shape', test_node.shape)

A shape:  torch.Size([8994, 8994, 5])
node_features shape: torch.Size([8994, 1902])
train_node shape: torch.Size([600])
valid_node.shape: torch.Size([300])
test_node.shape torch.Size([2125])


In [24]:
# 超参数设置
epochs = 40
node_dim = 64
num_channels = 2
lr = 0.005
weight_decay = 0.001
num_layers = 2
norm = True
num_classes = torch.max(train_target).item()+1

# Design Model

In [20]:
class GTN(nn.Module):

    def __init__(self, num_edge, num_channels, w_in, w_out, num_class, num_layers, norm):
        super(GTN, self).__init__()
        self.num_edge = num_edge
        self.num_channels = num_channels
        self.w_in = w_in
        self.w_out = w_out
        self.num_class = num_class
        self.num_layers = num_layers
        self.is_norm = norm
        layers = []
        for i in range(num_layers):  # layers是多个GTlayer组成的 表示要聚合几次meta-path
            if i == 0:
                layers.append(GTLayer(num_edge, num_channels, first=True))  # 第一层gt layer
            else:
                layers.append(GTLayer(num_edge, num_channels, first=False))  # 第二层gt layer
        self.layers = nn.ModuleList(layers)  # layers定义完成
        self.weight = nn.Parameter(torch.Tensor(w_in, w_out))  # GCN的参数
        self.bias = nn.Parameter(torch.Tensor(w_out))
        self.loss = nn.CrossEntropyLoss()
        self.linear1 = nn.Linear(self.w_out * self.num_channels, self.w_out)  # 多个channel拼接在一起 (2*64, 64)
        self.linear2 = nn.Linear(self.w_out, self.num_class)  # 最终输出
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)  # gloria初始化
        nn.init.zeros_(self.bias)

    def gcn_conv(self, X, H):
        X = torch.mm(X, self.weight)
        H = self.norm(H, add=True)  # 这里的add设置为true 是因为gcn中邻接矩阵要加上I
        return torch.mm(H.t(), X)

    def normalization(self, H):
        for i in range(self.num_channels):   # H的维度是 (2, 8994, 8994) 要对每一个channel的H做归一化
            if i == 0:
                H_ = self.norm(H[i, :, :]).unsqueeze(0)   # 对H[0]做归一化
            else:
                H_ = torch.cat((H_, self.norm(H[i, :, :]).unsqueeze(0)), dim=0)  # 对H[1]做归一化然后再拼接到一起
        return H_

    def norm(self, H, add=False):
        H = H.t()
        if add == False:
            H = H * ((torch.eye(H.shape[0]) == 0).type(torch.FloatTensor))  # 在这里去掉了对角线上的值 即自连接边 因为自连接边的产生是在Q1Q2相乘得到A^(1)的时候产生的 因此是在这里去掉 也就是得到A^(1)之后 与Q3相乘得到A^(2)之前
        else:
            H = H * ((torch.eye(H.shape[0]) == 0).type(torch.FloatTensor)) + torch.eye(H.shape[0]).type(
                torch.FloatTensor)  # 在进入到gcn的运算的时候 add为true 因为gcn中对邻接矩阵要加上一个单位矩阵I
        deg = torch.sum(H, dim=1)  # shape: (8994,)
        deg_inv = deg.pow(-1)  # 得到 D-1 但此时还是一维的 只是数值是D-1
        deg_inv[deg_inv == float('inf')] = 0  # 对角线原来是0取倒数后变为inf 这里重新置为0
        deg_inv = deg_inv * torch.eye(H.shape[0]).type(torch.FloatTensor)  # 重新转成二维的 即真正的D-1
        H = torch.mm(deg_inv, H)
        H = H.t()
        return H

    def forward(self, A, X, target_x, target):
        A = A.unsqueeze(0).permute(0, 3, 1, 2)  # 加一维之后再变换维度 (1, 8994, 8994, 5) -> (1, 5, 8994, 8994) 个人看法认为是因为 后续的卷积层的计算里面 没有一个concat操作
                                                # 也就是说如果这里是三维送进去 出来的是三维的 可以看conv层的forward函数 没有concat操作 是直接和卷积核相乘的 所以在这里预先把A变成四维的
                                                # 不然没法计算 这个1后续通过计算会变成2 也就是output_channel
        Ws = []
        for i in range(self.num_layers):
            if i == 0:
                H, W = self.layers[i](A)
            else:
                H = self.normalization(H)  # 对A^(1)要先归一化 D-1 * A
                H, W = self.layers[i](A, H)
            Ws.append(W)  # Ws是卷积层的参数

        for i in range(self.num_channels):  # 每个channel做一遍gcn
            if i == 0:
                X_ = F.relu(self.gcn_conv(X, H[i]))  # X是节点特征矩阵
            else:
                X_tmp = F.relu(self.gcn_conv(X, H[i]))
                X_ = torch.cat((X_, X_tmp), dim=1)
        X_ = self.linear1(X_)
        X_ = F.relu(X_)
        y = self.linear2(X_[target_x])
        loss = self.loss(y, target)
        return loss, y, Ws

In [21]:
class GTLayer(nn.Module):

    def __init__(self, in_channels, out_channels, first=True):
        super(GTLayer, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.first = first
        if self.first == True:  # 为什么这里有一个判断是否是第一层的变量 因为第一层要分别两次卷积得到两个Q矩阵 而后续只需要得到一个跟上面的结果拼起来就可以了
            self.conv1 = GTConv(in_channels, out_channels)  # W1
            self.conv2 = GTConv(in_channels, out_channels)  # W2
        else:
            self.conv1 = GTConv(in_channels, out_channels)  # W3

    def forward(self, A, H_=None):
        if self.first == True:
            a = self.conv1(A)   # a.shape (2, 8994, 8994)
            b = self.conv2(A)   # b.shape (2, 8994, 8994)
            H = torch.bmm(a, b)  # 第一次矩阵相乘得到A^(1) 批相乘算法 在这里就是每个channel对应做矩阵乘
            W = [(F.softmax(self.conv1.weight, dim=1)).detach(), (F.softmax(self.conv2.weight, dim=1)).detach()]  # s
        else:
            a = self.conv1(A)
            H = torch.bmm(H_, a)
            W = [(F.softmax(self.conv1.weight, dim=1)).detach()]
        return H, W

In [22]:
class GTConv(nn.Module):

    def __init__(self, in_channels, out_channels):
        super(GTConv, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, 1, 1))  # 1*1的卷积核 起到降维的作用
        self.bias = None
        self.scale = nn.Parameter(torch.Tensor([0.1]), requires_grad=False)
        self.reset_parameters()

    def reset_parameters(self):
        n = self.in_channels
        nn.init.constant_(self.weight, 0.1)  # 初始化参数为常量
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, A):
        '''
        0. 对weight(conv)做softmax
        1. 对每个节点在每个edgeType上进行[2, 5, 1, 1]的卷积操作
        2. 对每个edgeType进行加权求和
        # F.softmax(self.weight, dim=1)对self.weight做softmax:[2, 5, 1, 1]
        # A: [1, 5, 8994, 8994] * [2, 5, 1, 1] -> [2, 5, 8994, 8994]
        # sum: [2, 8994, 8994]
        '''
        A = torch.sum(A * F.softmax(self.weight, dim=1), dim=1)  # 对k=5这一维做了softmax操作
        return A

# Train Cycle

In [27]:
def accuracy(pred, target):
    return (pred == target).sum().item() / target.numel()

def true_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target == i)).sum())

    return torch.tensor(out)    

def true_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target != i)).sum())

    return torch.tensor(out)

def false_positive(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred == i) & (target != i)).sum())

    return torch.tensor(out)

def false_negative(pred, target, num_classes):
    out = []
    for i in range(num_classes):
        out.append(((pred != i) & (target == i)).sum())

    return torch.tensor(out)

def precision(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fp = false_positive(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fp)
    out[torch.isnan(out)] = 0

    return out

def recall(pred, target, num_classes):
    tp = true_positive(pred, target, num_classes).to(torch.float)
    fn = false_negative(pred, target, num_classes).to(torch.float)

    out = tp / (tp + fn)
    out[torch.isnan(out)] = 0

    return out

def f1_score(pred, target, num_classes):
    prec = precision(pred, target, num_classes)
    rec = recall(pred, target, num_classes)

    score = 2 * (prec * rec) / (prec + rec)
    score[torch.isnan(score)] = 0

    return score

In [28]:
final_f1 = 0
model = GTN(num_edge=A.shape[-1],  # 边的种类 4+1
            num_channels=num_channels,
            w_in=node_features.shape[1],  # 节点特征维数 1902
            w_out=node_dim,  # 隐层输出维度 64
            num_class=num_classes,
            num_layers=num_layers,  # 层数
            norm=norm)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
loss = nn.CrossEntropyLoss()

# Train & Valid & Test
best_val_loss = 10000
best_test_loss = 10000
best_train_loss = 10000
best_train_f1 = 0
best_val_f1 = 0
best_test_f1 = 0.

In [None]:
for i in range(epochs):
    for param_group in optimizer.param_groups:
        if param_group['lr'] > 0.005:
            param_group['lr'] = param_group['lr'] * 0.9
    print('Epoch:  ', i + 1)
    model.zero_grad()
    model.train()  # A (8994, 8994, 5)
    loss, y_train, Ws = model(A, node_features, train_node, train_target)
    train_f1 = torch.mean(
        f1_score(torch.argmax(y_train.detach(), dim=1), train_target, num_classes=num_classes)).cpu().numpy()
    print('Train - Loss: {}, Macro_F1: {}'.format(loss.detach().cpu().numpy(), train_f1))
    loss.backward()
    optimizer.step()
    model.eval()
    # Valid
    with torch.no_grad():
        val_loss, y_valid, _ = model.forward(A, node_features, valid_node, valid_target)
        val_f1 = torch.mean(
            f1_score(torch.argmax(y_valid, dim=1), valid_target, num_classes=num_classes)).cpu().numpy()
        print('Valid - Loss: {}, Macro_F1: {}'.format(val_loss.detach().cpu().numpy(), val_f1))
        test_loss, y_test, W = model.forward(A, node_features, test_node, test_target)
        test_f1 = torch.mean(
            f1_score(torch.argmax(y_test, dim=1), test_target, num_classes=num_classes)).cpu().numpy()
        print('Test - Loss: {}, Macro_F1: {}\n'.format(test_loss.detach().cpu().numpy(), test_f1))
    if val_f1 > best_val_f1:
        best_val_loss = val_loss.detach().cpu().numpy()
        best_test_loss = test_loss.detach().cpu().numpy()
        best_train_loss = loss.detach().cpu().numpy()
        best_train_f1 = train_f1
        best_val_f1 = val_f1
        best_test_f1 = test_f1
print('---------------Best Results--------------------')
print('Train - Loss: {}, Macro_F1: {}'.format(best_train_loss, best_train_f1))
print('Valid - Loss: {}, Macro_F1: {}'.format(best_val_loss, best_val_f1))
print('Test - Loss: {}, Macro_F1: {}'.format(best_test_loss, best_test_f1))
final_f1 += best_test_f1

Epoch:   1
