In this work, you are required to build a GNN training pipline. Then you can truly use the Graph Neural Network.

In [None]:
!pip install  dgl -f https://data.dgl.ai/wheels/repo.html
!pip install torch_geometric
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.1+cpu.html

First, we need to download the dataset and load data.

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
dataset = Planetoid("./", "Cora", transform=T.NormalizeFeatures())
data = dataset[0]

x = data.x
edge_index = data.edge_index
edge_weight = data.edge_weight

Then, you need to implement a GNN model. You may copy the GCNConv from your work two weeks ago, and build the model with the convolution layers.

In [None]:
from torch_geometric.nn import MessagePassing
import torch.nn as nn
import torch
class PyG_GCNConv(MessagePassing):
  def __init__(
      self,
      in_channel,
      out_channel,
  ):
    super(PyG_GCNConv, self).__init__()
    self.in_channel = in_channel
    self.out_channel = out_channel
    self.W = nn.Parameter(torch.ones((in_channel, out_channel)))
    self.b = nn.Parameter(torch.ones(out_channel))

  def forward(self,x, edge_index, edge_weight):
    return self.propagate(edge_index, x=x, edge_weight=edge_weight)
    
  def message(self,x_j, edge_weight):
    return edge_weight.view(-1,1) * x_j

  def update(self, aggr_out):
    return aggr_out @ self.W + self.b
  
class PyG_GCN(nn.Module):
    def __init__(self,in_channels, hidden_channels, out_channels):
        super().__init__()
        self.hidden_channels = hidden_channels
        self.conv1 = PyG_GCNConv(in_channels, hidden_channels)
        self.conv2 = PyG_GCNConv(hidden_channels, out_channels)
    def forward(self,x,edge_index,edge_weight):
       x = self.conv1(x,edge_index,edge_weight)
       x = x.relu()
       x = self.conv2(x,edge_index,edge_weight)
       return x

Building the training and evaluation part, this is similar to the work in week4. Our downstream task is just node classification.

In [None]:
from torch_geometric.logging import log
import torch   
# Build your training pipeline
hidden_dim = 16
lr = 0.001
epochs = 100
model = PyG_GCN(dataset.num_features, hidden_dim, dataset.num_classes)
# optimizer = torch.optim.Adam(
#     dict(params=model.parameters(), weight_decay=5e-4), lr=lr)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

loss_func = nn.CrossEntropyLoss()
def train():
  model.train()
  optimizer.zero_grad()
  out = model(data.x, data.edge_index,data.edge_weight)
  mask = data.x.train_mask
  loss = loss_func(out[mask], data.y[mask])
  loss.backward()
  optimizer.step()
  return float(loss)
   
@torch.no_grad()
def test():
  model.eval()
  pred = model(data.x, data.edge_index, data.edge_weight).argmax(dim=-1)

  accs = []
  for mask in [data.train_mask, data.val_mask, data.test_mask]:
      accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
  return accs

for epoch in range(1, epochs + 1):
  loss = train()
  train_acc, val_acc, tmp_test_acc = test()
  if val_acc > best_val_acc:
      best_val_acc = val_acc
      test_acc = tmp_test_acc
  log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)

Now, you can train the GCN model with PyG. Next, you may try using the DGL to implement the similiar function.

In [None]:
import argparse

import dgl
import dgl.nn as dglnn

import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import AddSelfLoop
from dgl.data import CoraGraphDataset
import dgl.function as fn
transform = (
        AddSelfLoop()
    )
data = CoraGraphDataset(transform=transform)
g = data[0]
features = g.ndata["feat"]
labels = g.ndata["label"]
masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]


class DGL_GCNConv(nn.Module):
  def __init__(self, in_channel, out_channel):
    super(DGL_GCNConv, self).__init__()
    self.in_channel = in_channel
    self.out_channel = out_channel
    self.W = nn.Parameter(torch.ones(in_channel, out_channel))
    self.b = nn.Parameter(torch.ones(out_channel))

  def forward(self, g, h, edge_weight):
    # Your code here
    g.ndata['h'] = h
    g.edata['edge_weight'] = edge_weight
    g.ndata['hm'] = g.ndata['h'] @ self.W
    g.update_all(fn.u_mul_e('hm', 'edge_weight', 'm'), fn.sum('m', 'h'))
    g.ndata['h'] = g.ndata['h'] + self.b
        
    return g.ndata['h']
    # End code here

class DGL_GCN(nn.Module):
  def __init__(self,in_feats,out_feats):
    super().__init__()
    hid_feats = 16
    self.conv1 = DGL_GCNConv(in_feats,hid_feats)
    self.conv2 = DGL_GCNConv(hid_feats,hid_feats)
    self.conv3 = DGL_GCNConv(hid_feats,out_feats)
  def forward(self, graph, inputs):
    edge_weight = torch.ones(graph.number_of_edges())
    h = self.conv1(graph, inputs,edge_weight)
    h = F.relu(h)
    edge_weight = torch.ones(graph.number_of_edges())
    h = self.conv2(graph, h,edge_weight)
    h = F.relu(h)
    edge_weight = torch.ones(graph.number_of_edges())
    h = self.conv3(graph, h,edge_weight)
    return h

def train(g, features, labels, masks, model):
  n_features = features.shape[1]
  n_labels = int(labels.max().item() + 1)
  model = DGL_GCN(n_features, n_labels)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
  for epoch in range(100):
    model.train()
    logits = model(g, features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[masks[0]], labels[masks[0]])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_acc = (logp[masks[0]].argmax(1) == labels[masks[0]]).float().mean()
    val_acc = evaluate(g, features, labels, masks[1], model)
    print(f'Epoch {epoch}, Loss: {loss.item():.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}')

def evaluate(g, features, labels, mask, model):
  model.eval()
  with torch.no_grad():
    logits = model(g, features)
    logits = logits[mask]
    labels = labels[mask]
    _, indices = torch.max(logits, dim=1)
    correct = torch.sum(indices == labels)
    return correct.item() * 1.0 / len(labels)

model = DGL_GCN(features.shape[1], 16)
print("Training...")
train(g, features, labels, masks, model)

# test the model
print("Testing...")
acc = evaluate(g, features, labels, masks[2], model)
print("Test accuracy {:.4f}".format(acc))

If you find it hard to implement, you may refer to the official implementation of the GNN training, like [PyG](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/gcn.py) and [DGL](https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/train.py).