In [36]:
from load_hetero_data import HeteroDataset
# Name of dataset
name="squirrel"
# Load dataset with fixed split
dataset=HeteroDataset(name,"./dataset/splits/"+name+"_split_0.npz")
print("num of nodes:",dataset.graph.num_nodes)
print("num of edges:",dataset.graph.num_edges)
print("dimension of feature:",dataset.graph.node_feat["feat"].shape)
print("class of labels:", int(max(dataset.y))+1)
print("train Examples:",len(dataset.train_index))
print("val Examples:",len(dataset.val_index))
print("test Examples:",len(dataset.test_index))

num of nodes: 5201
num of edges: 396846
dimension of feature: (5201, 2089)
class of labels: 5
train Examples: 2496
val Examples: 1664
test Examples: 1041


In [37]:
g = dataset.graph

In [38]:
import paddle.nn as nn
import pgl

class GCN(nn.Layer):
    """Implement of GCN
    """

    def __init__(self,
                 input_size,
                 num_class,
                 num_layers=2,
                 hidden_size=16,
                 **kwargs):
        super(GCN, self).__init__()
        self.num_class = num_class
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gcns = nn.LayerList()
        for i in range(self.num_layers):
            if i == 0:
                self.gcns.append(
                    pgl.nn.GCNConv(
                        input_size,
                        self.hidden_size,
                        activation="relu",
                        norm=True))
            else:
                self.gcns.append(
                    pgl.nn.GCNConv(
                        self.hidden_size,
                        self.hidden_size,
                        activation="relu",
                        norm=True))

        self.output = nn.Linear(self.hidden_size, self.num_class)
    def forward(self, graph, feature):
        for m in self.gcns:
            feature = m(graph, feature)
        logits = self.output(feature)
        return logits

In [39]:
import paddle
from paddle.optimizer import Adam

g = g.tensor()
y = paddle.to_tensor(dataset.y)
gcn = GCN(g.node_feat["feat"].shape[1], 5)
criterion = paddle.nn.loss.CrossEntropyLoss()
optim = Adam(learning_rate=0.01,
             parameters=gcn.parameters())


In [40]:

for epoch in range(200):
    gcn.train()
    logits = gcn(g, g.node_feat['feat'])
    loss = criterion(logits[dataset.train_index], y[dataset.train_index])
    loss.backward()
    optim.step()
    optim.clear_grad()
    gcn.eval()
    loss2 = criterion(logits[dataset.val_index], y[dataset.val_index])
    # 30 epoch 不下降 break
    # 《30,200》  《200,1000》  <early_stop , num_epochs>
    print("epoch: %s | train_loss: %.4f | val_loss:%.4f" % (epoch, loss.numpy(), loss2.numpy()))


0.21229586935638808
epoch: 0 | train_loss: 1.6095 | val_loss:1.6095 | test_loss: 1.6094
0.2084534101825168
epoch: 1 | train_loss: 1.6087 | val_loss:1.6095 | test_loss: 1.6090
0.1930835734870317
epoch: 2 | train_loss: 1.6070 | val_loss:1.6082 | test_loss: 1.6076
0.19692603266090297
epoch: 3 | train_loss: 1.6053 | val_loss:1.6065 | test_loss: 1.6061
0.2334293948126801
epoch: 4 | train_loss: 1.6035 | val_loss:1.6046 | test_loss: 1.6044
0.2574447646493756
epoch: 5 | train_loss: 1.6014 | val_loss:1.6026 | test_loss: 1.6023
0.2641690682036503
epoch: 6 | train_loss: 1.5990 | val_loss:1.6003 | test_loss: 1.6000
0.2641690682036503
epoch: 7 | train_loss: 1.5963 | val_loss:1.5979 | test_loss: 1.5974
0.2737752161383285
epoch: 8 | train_loss: 1.5934 | val_loss:1.5951 | test_loss: 1.5945
0.27761767531219983
epoch: 9 | train_loss: 1.5900 | val_loss:1.5919 | test_loss: 1.5913
0.2804995196926033
epoch: 10 | train_loss: 1.5863 | val_loss:1.5885 | test_loss: 1.5878
0.2862632084534102
epoch: 11 | train_lo

In [41]:
logits = gcn(g, g.node_feat['feat'])

In [42]:
m = paddle.metric.Accuracy()
correct = m.compute(logits[dataset.test_index], y[dataset.test_index])
m.update(correct)
res = m.accumulate()# 计算 top-k（topk 中的最大值）的索引
print(res)


0.5619596541786743


In [43]:
import numpy as np
x = paddle.to_tensor(np.array([
    [0.1, 0.2, 0.3, 0.4],
    [0.1, 0.4, 0.3, 0.2],
    [0.1, 0.2, 0.4, 0.3],
    [0.1, 0.2, 0.3, 0.4]]))
y = paddle.to_tensor(np.array([[0], [1], [2], [3]]))
m = paddle.metric.Accuracy()
correct = m.compute(x, y)
m.update(correct)
res = m.accumulate()
print(correct) # 0.75


Tensor(shape=[4, 1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [[0.],
        [1.],
        [1.],
        [1.]])


In [44]:
print(y[dataset.test_index])

Tensor(shape=[1041, 1], dtype=int64, place=Place(gpu:0), stop_gradient=True,
       [[0],
        [3],
        [0],
        ...,
        [1],
        [0],
        [3]])
