In this work, you are required to build a GNN training pipline. Then you can truly use the Graph Neural Network.

First, we need to download the dataset and load data.

In [1]:
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
dataset = Planetoid("./", "Cora", transform=T.NormalizeFeatures())
data = dataset[0]

x = data.x
edge_index = data.edge_index
edge_weight = data.edge_weight

  from .autonotebook import tqdm as notebook_tqdm
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Then, you need to implement a GNN model. You may copy the GCNConv from your work two weeks ago, and build the model with the convolution layers.

In [5]:
import torch
import torch.nn as nn
from torch_geometric.nn import MessagePassing
from torch_geometric.nn import GCNConv
class PyG_GCNConv(MessagePassing):
  def __init__(self, in_channel, out_channel):
    super().__init__(aggr="sum")
    self.lin = nn.Linear(in_channel, out_channel, bias=True)

    nn.init.xavier_uniform_(self.lin.weight, gain=1.414)
    if self.lin.bias is not None:
      nn.init.zeros_(self.lin.bias)


  def forward(self, x, edge_index, edge_weight=None):
    x_T = x @ self.W
    out = self.propagate(edge_index=edge_index, edge_weight=edge_weight, x=x_T)
    out += self.lin.bias
    return out

  def message(self, x_j, edge_weight):
    return edge_weight.unsqueeze(-1) * x_j

class PyG_GCN(nn.Module):
  def __init__(self, in_channel, hidden_channel, out_channel, num_layers=2):
    super().__init__()
    self.layers = nn.ModuleList()
    self.layers.append(GCNConv(in_channel, hidden_channel))
    for _ in range(num_layers-2):
      self.layers.append(GCNConv(hidden_channel, hidden_channel))
    self.layers.append(GCNConv(hidden_channel, out_channel))
    self.relu = nn.ReLU()
  
  def forward(self, x, edge_index, edge_weight=None):
    for layer in self.layers[:-1]:
      x = layer(x, edge_index, edge_weight=edge_weight)
      x = self.relu(x)
    out = self.layers[-1](x, edge_index, edge_weight=edge_weight)
    return out

Building the training and evaluation part, this is similar to the work in week4. Our downstream task is just node classification.

In [None]:
from torch_geometric.logging import log
# Build your training pipeline
hidden_dim = 16
lr = 0.001
epochs = 100
model = PyG_GCN(dataset.num_features, hidden_dim, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), weight_decay=5e-4, lr=lr)
criterion = nn.CrossEntropyLoss()
best_val_acc = 0.0
test_acc = 0.0

def train():
  model.train()
  optimizer.zero_grad()

  out = model(x, edge_index, edge_weight)
  loss = criterion(out[data.train_mask], data.y[data.train_mask])
  loss.backward()
  optimizer.step()

  return loss.item()

@torch.no_grad()
def test():
  model.eval()
  pred = model(x, edge_index, edge_weight).argmax(dim=-1)

  accs = []
  for mask in [data.train_mask, data.val_mask, data.test_mask]:
      accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
  return accs

for epoch in range(1, epochs + 1):
  loss = train()
  train_acc, val_acc, tmp_test_acc = test()
  if val_acc > best_val_acc:
      best_val_acc = val_acc
      test_acc = tmp_test_acc
  log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)

Epoch: 001, Loss: 1.9460, Train: 0.1857, Val: 0.1500, Test: 0.1750
Epoch: 002, Loss: 1.9451, Train: 0.2643, Val: 0.1780, Test: 0.2270
Epoch: 003, Loss: 1.9444, Train: 0.3286, Val: 0.2140, Test: 0.2700
Epoch: 004, Loss: 1.9436, Train: 0.3786, Val: 0.2460, Test: 0.3020
Epoch: 005, Loss: 1.9429, Train: 0.5071, Val: 0.2920, Test: 0.3400
Epoch: 006, Loss: 1.9421, Train: 0.5643, Val: 0.3280, Test: 0.3770
Epoch: 007, Loss: 1.9413, Train: 0.6286, Val: 0.3760, Test: 0.4180
Epoch: 008, Loss: 1.9405, Train: 0.6643, Val: 0.4160, Test: 0.4640
Epoch: 009, Loss: 1.9396, Train: 0.6929, Val: 0.4440, Test: 0.4960
Epoch: 010, Loss: 1.9387, Train: 0.7214, Val: 0.4780, Test: 0.5350
Epoch: 011, Loss: 1.9377, Train: 0.7643, Val: 0.5220, Test: 0.5710
Epoch: 012, Loss: 1.9368, Train: 0.8071, Val: 0.5580, Test: 0.5860
Epoch: 013, Loss: 1.9358, Train: 0.8500, Val: 0.5720, Test: 0.6060
Epoch: 014, Loss: 1.9348, Train: 0.8714, Val: 0.5760, Test: 0.6240
Epoch: 015, Loss: 1.9337, Train: 0.8786, Val: 0.5740, Test: 0.

Now, you can train the GCN model with PyG. Next, you may try using the DGL to implement the similiar function.

In [None]:
import argparse

import dgl
import dgl.nn as dglnn

import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import AddSelfLoop
from dgl.data import CoraGraphDataset

transform = (
        AddSelfLoop()
    )
data = CoraGraphDataset(transform=transform)
g = data[0]
features = g.ndata["feat"]
labels = g.ndata["label"]
masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]


class DGL_GCNConv(nn.Module):
  # Your code here
  pass
  # End code here

class DGL_GCN(nn.Module):
  # Your code here
  pass
  # End code here

def train(g, features, labels, masks, model):
  # Your code here
  pass
  # End code here

def evaluate(g, features, labels, mask, model):
  model.eval()
  with torch.no_grad():
    logits = model(g, features)
    logits = logits[mask]
    labels = labels[mask]
    _, indices = torch.max(logits, dim=1)
    correct = torch.sum(indices == labels)
    return correct.item() * 1.0 / len(labels)

model = DGL_GCN(features.shape[1], 16)
print("Training...")
train(g, features, labels, masks, model)

# test the model
print("Testing...")
acc = evaluate(g, features, labels, masks[2], model)
print("Test accuracy {:.4f}".format(acc))

If you find it hard to implement, you may refer to the official implementation of the GNN training, like [PyG](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/gcn.py) and [DGL](https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/train.py).