# Smaple of the official dgl website

In [44]:
import numpy as np
import dgl
import torch
import torch as th

# 1~100生成500個nodes
src = np.random.randint(0, 100, 500)
dst = np.random.randint(0, 100, 500)


# make it symmetric
# edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src])))
edge_pred_graph = dgl.graph((src, dst))
    
# synthetic node and edge features, as well as edge labels
edge_pred_graph.ndata['feature'] = torch.randn(100, 10).float()

edge_pred_graph.edata['feature'] = torch.randn(500, 10).float()
# edge_pred_graph.edata['label'] = torch.randn(500)
edge_pred_graph.edata['label'] = torch.randint(0, 100, (500,)).long()
# edge_pred_graph.edata['label'] = edge_pred_graph.edata['label'].float()


# synthetic train-test splits, which is about 3:2(not exactly)
# edge_pred_graph.edata['train_mask'] = torch.zeros(500, dtype=torch.bool).bernoulli(0.6)

num_edges = 500
train_ratio = 0.6
num_train = int(num_edges * train_ratio)

# 隨機排列的索引
permuted_idxs = torch.randperm(num_edges)

train_mask = torch.zeros(num_edges, dtype=torch.bool)
valid_mask = torch.zeros(num_edges, dtype=torch.bool)

train_mask[permuted_idxs[:num_train]] = True
valid_mask[permuted_idxs[num_train:]] = True

edge_pred_graph.edata['train_mask'] = train_mask
edge_pred_graph.edata['valid_mask'] = valid_mask



In [45]:
print(edge_pred_graph)

Graph(num_nodes=100, num_edges=500,
      ndata_schemes={'feature': Scheme(shape=(10,), dtype=torch.float32)}
      edata_schemes={'feature': Scheme(shape=(10,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'valid_mask': Scheme(shape=(), dtype=torch.bool)})


In [46]:
print(edge_pred_graph.ndata['feature'].shape)

torch.Size([100, 10])


In [47]:
print(src.shape)
print(src)

(500,)
[54 19 88 41 40 63 96 14  7 33 14 95 78 38 89 28 59 92 27  3 37 81 12 79
  9 55 50 15 83 55 66 79 64 31 85 71 78  4 92 21 19 45 81 95 30 67 25 14
 80 68 89 17 28 30 83 85 14 10 19 75 70 60 72 36 10 25 42 84 20 87 66 50
 99 42 43 63 65 43 10 64 80 31 29 92 84 57 15 86 51 72 25 40 52 51 65  6
 95 95 90 17 12  1 78 29 28 68 98 76 30 29 18 71 18 82 68 52 77 48 65 92
 30  6 30 49 33 76 25 30 25 77 36 29 16 29 95 70 65 33 25 82 30 72 89 42
 13 25 67 52 67 17 51 96 10 69 79  6 52 22 31 85 85 80 64 55 33 17 20 30
 42 85 91 24 28 95 38 75 91 16 71  3 46  1 51  7 28 93 43 36 89 55  6  0
 16 47 40  6 44 97 85 86 29 38 28 66 56 65 91 34 29  9  1 12 66 81 13 78
 10 20 43 61 24 94 78 77 14  6 27 22  6 43 29  9 75  4 40 98 57 32 40 19
 61 32 83 40 32 46 34 97 74  0 40 27 82 71 53 24 16 36 91 41 63 86 14 63
 73 39 78 93 91 48 32 97 85 76 57  9  0 48 54  0 74 48  4 26 45  4 93 73
 98 67 82 27 28 47 56  3 85 46 84 38 76 31 43  9 29 45 24 31  9 61 42 80
 37 28 26 94  3 78 34 45  6 72 83 55  0 74 5

In [56]:
print(edge_pred_graph.ndata['feature'].shape)
print(edge_pred_graph.edata['feature'].shape)
# print(edge_pred_graph.ndata['feature'])
# print(edge_pred_graph.edata['label'])

torch.Size([100, 10])
torch.Size([500, 10])


In [71]:
# Contruct a two-layer GNN model
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F

class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, dropout_prob = 0.25):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='pool')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='pool')
        
        self.dropout = nn.Dropout(dropout_prob)
        
    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.conv2(graph, h)
#         print("weight: ", self.conv1.fc_self.weight)
        return h

In [75]:
class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes):
        super().__init__()
        self.W = nn.Linear(out_feats*2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']

In [86]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes):
        super().__init__()
        self.sage = SAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes)
      
    def forward(self, g, x, return_logits=False):
        h = self.sage(g, x)
        logits = self.pred(g, h)
        
        if return_logits:
            return logits

        output = torch.softmax(logits, dim=1)
        predicted_classes = torch.argmax(output, dim=1)
        return predicted_classes.float()

In [87]:
from tqdm.notebook import tqdm

node_features = edge_pred_graph.ndata['feature']

# the true label, which should be float()
edge_label = edge_pred_graph.edata['label'].float()

train_mask = edge_pred_graph.edata['train_mask']

model = Model(in_features=10, hidden_features=20, out_features=50, num_classes=100)

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
total_steps = 10000


for epoch in tqdm(range(total_steps), desc="Training", position=0, leave=True):
    model.train()
    scores = model(edge_pred_graph, node_features, return_logits=True)
    
    score = scores[train_mask]
    true_label = edge_label[train_mask].long()  # CrossEntropyLoss expects Long type for labels

    loss = criterion(score, true_label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Calculate Accuracy
    with torch.no_grad():
        preds_label = model(edge_pred_graph, node_features)
        accuracy = (preds_label[train_mask] == true_label.float()).float().mean()

    if epoch % 200 == 0:
#         print("training pred: ", prediction)
#         print("training label: ", true_label)
        print(f"Epoch: {epoch}, Loss: {loss.item()}, Training Accuracy: {accuracy.item() * 100:.2f}%\n\n")

# Validation
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    scores = model(edge_pred_graph, node_features, return_logits=True)
    
    val_score = scores[valid_mask]
    val_label = edge_label[valid_mask].long()

    val_loss = criterion(val_score, val_label)

    val_preds_label = torch.argmax(val_score, dim=1)
    val_accuracy = (val_preds_label == val_label).float().mean()

    print("====================================================================================")
    print(f"Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item() * 100:.2f}%")

Training:   0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 0, Loss: 5.971440315246582, Training Accuracy: 2.67%


Epoch: 200, Loss: 0.3132559061050415, Training Accuracy: 88.33%


Epoch: 400, Loss: 0.12932245433330536, Training Accuracy: 95.67%


Epoch: 600, Loss: 0.11643753200769424, Training Accuracy: 93.33%


Epoch: 800, Loss: 0.10570906102657318, Training Accuracy: 91.33%


Epoch: 1000, Loss: 0.11307023465633392, Training Accuracy: 96.67%


Epoch: 1200, Loss: 0.07636285573244095, Training Accuracy: 96.00%


Epoch: 1400, Loss: 0.19097991287708282, Training Accuracy: 94.00%


Epoch: 1600, Loss: 0.1270306557416916, Training Accuracy: 96.33%


Epoch: 1800, Loss: 0.08733554184436798, Training Accuracy: 96.00%


Epoch: 2000, Loss: 0.12367800623178482, Training Accuracy: 96.67%


Epoch: 2200, Loss: 0.10462213307619095, Training Accuracy: 96.67%


Epoch: 2400, Loss: 0.06537387520074844, Training Accuracy: 93.67%


Epoch: 2600, Loss: 0.15967105329036713, Training Accuracy: 94.00%


Epoch: 2800, Loss: 0.07520807534456253, Training Accuracy: 9

In [85]:
import numpy as np
import dgl
import torch
import torch as th

# 1~100生成500個nodes
src = np.random.randint(0, 100, 500)
dst = np.random.randint(0, 100, 500)


# make it symmetric
# edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src])))
edge_pred_graph = dgl.graph((src, dst))
    
# synthetic node and edge features, as well as edge labels
edge_pred_graph.ndata['feature'] = torch.randn(100, 10).float()

edge_pred_graph.edata['feature'] = torch.randn(500, 10).float()
# edge_pred_graph.edata['label'] = torch.randn(500)
edge_pred_graph.edata['label'] = torch.randint(0, 100, (500,)).long()
# edge_pred_graph.edata['label'] = edge_pred_graph.edata['label'].float()


# synthetic train-test splits, which is about 3:2(not exactly)
# edge_pred_graph.edata['train_mask'] = torch.zeros(500, dtype=torch.bool).bernoulli(0.6)

num_edges = 500
train_ratio = 0.6
num_train = int(num_edges * train_ratio)

# 隨機排列的索引
permuted_idxs = torch.randperm(num_edges)

train_mask = torch.zeros(num_edges, dtype=torch.bool)
valid_mask = torch.zeros(num_edges, dtype=torch.bool)

train_mask[permuted_idxs[:num_train]] = True
valid_mask[permuted_idxs[num_train:]] = True

edge_pred_graph.edata['train_mask'] = train_mask
edge_pred_graph.edata['valid_mask'] = valid_mask


# Contruct a two-layer GNN model
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F

class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, dropout_prob = 0.25):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='pool')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='pool')
        
        self.dropout = nn.Dropout(dropout_prob)
        
    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.conv2(graph, h)
#         print("weight: ", self.conv1.fc_self.weight)
        return h

class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes):
        super().__init__()
        self.W = nn.Linear(out_feats*2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']
        
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes):
        super().__init__()
        self.sage = SAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes)
      
    def forward(self, g, x, return_logits=False):
        h = self.sage(g, x)
        logits = self.pred(g, h)
        
        if return_logits:
            return logits

        output = torch.softmax(logits, dim=1)
        predicted_classes = torch.argmax(output, dim=1)
        return predicted_classes.float()

from tqdm.notebook import tqdm

node_features = edge_pred_graph.ndata['feature']

# the true label, which should be float()
edge_label = edge_pred_graph.edata['label'].float()

train_mask = edge_pred_graph.edata['train_mask']

model = Model(in_features=10, hidden_features=20, out_features=50, num_classes=100)

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
total_steps = 100


for epoch in tqdm(range(total_steps), desc="Training", position=0, leave=True):
    model.train()
    scores = model(edge_pred_graph, node_features, return_logits=True)
    
    score = scores[train_mask]
    true_label = edge_label[train_mask].long()  # CrossEntropyLoss expects Long type for labels

    loss = criterion(score, true_label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Calculate Accuracy
    with torch.no_grad():
        preds_label = model(edge_pred_graph, node_features)
        accuracy = (preds_label[train_mask] == true_label.float()).float().mean()

    if epoch % 200 == 0:
#         print("training pred: ", prediction)
#         print("training label: ", true_label)
        print(f"Epoch: {epoch}, Loss: {loss.item()}, Training Accuracy: {accuracy.item() * 100:.2f}%\n\n")

# Validation
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    scores = model(edge_pred_graph, node_features, return_logits=True)
    
    val_score = scores[valid_mask]
    val_label = edge_label[valid_mask].long()

    val_loss = criterion(val_score, val_label)

    val_preds_label = torch.argmax(val_score, dim=1)
    val_accuracy = (val_preds_label == val_label).float().mean()

    print("====================================================================================")
    print(f"Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item() * 100:.2f}%")

Training:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, Loss: 6.121854782104492, Training Accuracy: 1.33%


Validation Loss: 10.762978553771973, Validation Accuracy: 1.00%
