In [1]:
# 构建一个2层的GNN模型
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F

class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        # 实例化SAGEConve，in_feats是输入特征的维度，out_feats是输出特征的维度，aggregator_type是聚合函数的类型
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean')

    def forward(self, graph, inputs):
        
        # 输入是节点的特征
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h

In [2]:
node_features = graph.ndata['feat']
node_labels = graph.ndata['label']
train_mask = graph.ndata['train_mask']
valid_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
n_features = node_features.shape[1]
n_labels = int(node_labels.max().item() + 1)

NameError: name 'graph' is not defined

In [None]:
def evaluate(model, graph, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(graph, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [None]:
model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels)
opt = torch.optim.Adam(model.parameters())

for epoch in range(10):
    model.train()
    # 使用所有节点(全图)进行前向传播计算
    logits = model(graph, node_features)
    # 计算损失值
    loss = F.cross_entropy(logits[train_mask], node_labels[train_mask])
    # 计算验证集的准确度
    acc = evaluate(model, graph, node_features, node_labels, valid_mask)
    # 进行反向传播计算
    opt.zero_grad()
    loss.backward()
    opt.step()
    print(loss.item())

    # 如果需要的话，保存训练好的模型。本例中省略。

In [65]:
import numpy as np
import dgl
import torch
import torch as th

# 1~100生成500個nodes
src = np.random.randint(0, 100, 500)
dst = np.random.randint(0, 100, 500)


# make it symmetric
# edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src])))
edge_pred_graph = dgl.graph((src, dst))
    
# synthetic node and edge features, as well as edge labels
edge_pred_graph.ndata['feature'] = torch.randn(100, 10).float()

edge_pred_graph.edata['feature'] = torch.randn(500, 10).float()
# edge_pred_graph.edata['label'] = torch.randn(500)
edge_pred_graph.edata['label'] = torch.randint(0, 100, (500,)).float()
# edge_pred_graph.edata['label'] = edge_pred_graph.edata['label'].float()


# synthetic train-test splits, which is 3:2
edge_pred_graph.edata['train_mask'] = torch.zeros(500, dtype=torch.bool).bernoulli(0.6)

In [66]:
print(edge_pred_graph)

Graph(num_nodes=100, num_edges=500,
      ndata_schemes={'feature': Scheme(shape=(10,), dtype=torch.float32)}
      edata_schemes={'feature': Scheme(shape=(10,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.float32), 'train_mask': Scheme(shape=(), dtype=torch.bool)})


In [67]:
print(edge_pred_graph.ndata['feature'].shape)

torch.Size([100, 10])


In [68]:
print(src.shape)
print(src)

(500,)
[75 22 31 90 84 83 21 19  9  5  5 36 42 15 28 68 52 63 80  6 71  6 45 47
 17 60 71 30 18 78 76 69 52 28 14 89 87 72  3 74 95 23 86 26 49 92  3 89
 27 92 72 40 26 26 19 68 81 60 53 34 39 13 49 54 55 50 83 61 47 39 63 99
 32 50 39 77 42 21 94 18 69 17 17 97  2 53 64 21 85 86 14 81 40 28 68 11
 31 54 15 80 32 38 38 46 85  2 68 99 88 40 71 52 85 51 35 78 93 33 76 86
 85 59 59 80 84  8  7  7 81 31 68 77 58 74  5 21 13 96 53 85 70 55 69 82
 77  8 97 93 54 28 94 32 33 25  0  2 76 40 13 48 78  8 88 92 11 99 49 77
 11  4 33 69 18 30 59 52 97 43  7 77 90 11 91 23 34 79 73 87 60 71 19  0
  5 77 85 70 65 49 34 44 29 54 21 62 33 15 75 14 32 47 57  2 59  0 18 35
 49 72 63 34 57 21 67 87  8 73 70 27 97 85 52 49 35 85 29 28 26 32  4  3
 43 21 67 94  6 21 67 34 70 10 24 85 13 30 17 26 98  9 84 27  9 71 15 95
 33 85 66 85 43 27 57 16 91 11  2 19 61 73  2 20 42 29 67 26 34 96 76 84
  3 12 80 22 63 15 11 33  1 56 98 27 95 44 26 61  1 36 52 69 53 14 43  2
 51 61 48 86 27 18 80  1 53 15  9 41 86 99 4

In [69]:
print(edge_pred_graph.ndata['feature'].shape)
print(edge_pred_graph.edata['feature'].shape)
# print(edge_pred_graph.ndata['feature'])
print(edge_pred_graph.edata['label'])

torch.Size([100, 10])
torch.Size([500, 10])
tensor([69., 54.,  7., 33., 95., 56., 69.,  0., 56., 52., 67., 87., 93., 54.,
        86., 93., 69.,  8., 26., 74., 26., 49., 37., 85., 34., 69.,  4., 31.,
        57., 52., 99., 57., 97., 22., 84., 26., 72., 10., 69., 12., 92., 22.,
        36., 89., 73., 65., 55., 93., 47., 14., 86., 91., 67., 33., 54., 70.,
        35., 22., 11.,  6., 95., 50., 92., 84., 57., 41., 64., 62., 32., 71.,
        59.,  9., 17., 86., 35., 91., 98., 12.,  4., 71., 44., 41., 56., 77.,
        90.,  6., 98., 55., 94., 36., 23., 76., 20., 31., 27., 49., 49., 43.,
        16., 12., 93., 74., 92.,  1., 99., 11., 66., 27., 80., 91.,  8.,  8.,
        15., 27., 42., 21., 35., 63., 55., 78., 66., 30., 38., 63., 41., 85.,
        56., 73., 87., 87., 40., 73., 32., 68., 68., 86., 62., 84., 91.,  8.,
        61., 35., 47., 93., 76., 58., 85.,  5., 74., 34., 69.,  7., 49., 42.,
        49.,  8., 59.,  4., 38., 53., 52., 25., 31., 95.,  3., 55., 81., 80.,
        65.,  2., 98

In [70]:
# Contruct a two-layer GNN model
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F

class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h

In [71]:
class MLPPredictor(nn.Module):
    def __init__(self, in_features, out_classes):
        super().__init__()
        self.W = nn.Linear(in_features * 2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        # h contains the node representations computed from the GNN defined
        # in the node classification section (Section 5.1).
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']

In [72]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super().__init__()
        self.sage = SAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(in_features, 100)
        
    def forward(self, g, x):
        h = self.sage(g, x)
        score = self.pred(g, h)
        
        output = torch.softmax(score, dim=1)
#         print("output", output)
#         print("output's shape", output.shape)
        
        predicted_classes = torch.argmax(output, dim=1)
#         print("Predicted label:", predicted_classes)
#         print("shape", predicted_classes.shape)
        
        row_sums = torch.sum(output, dim=1)
#         print("sum", row_sums)
#         print("shape", row_sums.shape)
        
        return predicted_classes

In [75]:
from tqdm.notebook import tqdm

node_features = edge_pred_graph.ndata['feature']

# the true label, which should be float()
edge_label = edge_pred_graph.edata['label'].float()
print("label: ", edge_label, "\nshape", edge_label.shape)

train_mask = edge_pred_graph.edata['train_mask']

model = Model(10, 20, 10)
# self.sage = SAGE(in_feats=10, hidden_feats=20, out_feats=1)

optimizer = torch.optim.AdamW(model.parameters())
criterion = nn.CrossEntropyLoss()
total_steps = 10000

for epoch in tqdm(range(total_steps), desc="Training", position=0, leave=True):
    
    pred = model(edge_pred_graph, node_features)    
    print(pred[train_mask].shape)
    print(edge_label[train_mask].shape)
    
    loss = criterion(pred[train_mask], edge_label[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 50==0:
        print(loss.item())
#         print(loss)

label:  tensor([69., 54.,  7., 33., 95., 56., 69.,  0., 56., 52., 67., 87., 93., 54.,
        86., 93., 69.,  8., 26., 74., 26., 49., 37., 85., 34., 69.,  4., 31.,
        57., 52., 99., 57., 97., 22., 84., 26., 72., 10., 69., 12., 92., 22.,
        36., 89., 73., 65., 55., 93., 47., 14., 86., 91., 67., 33., 54., 70.,
        35., 22., 11.,  6., 95., 50., 92., 84., 57., 41., 64., 62., 32., 71.,
        59.,  9., 17., 86., 35., 91., 98., 12.,  4., 71., 44., 41., 56., 77.,
        90.,  6., 98., 55., 94., 36., 23., 76., 20., 31., 27., 49., 49., 43.,
        16., 12., 93., 74., 92.,  1., 99., 11., 66., 27., 80., 91.,  8.,  8.,
        15., 27., 42., 21., 35., 63., 55., 78., 66., 30., 38., 63., 41., 85.,
        56., 73., 87., 87., 40., 73., 32., 68., 68., 86., 62., 84., 91.,  8.,
        61., 35., 47., 93., 76., 58., 85.,  5., 74., 34., 69.,  7., 49., 42.,
        49.,  8., 59.,  4., 38., 53., 52., 25., 31., 95.,  3., 55., 81., 80.,
        65.,  2., 98., 42., 67., 41., 36., 26., 87., 24.

Training:   0%|          | 0/10000 [00:00<?, ?it/s]

torch.Size([300])
torch.Size([300])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)