In [4]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
sys.path.append('../..')
import glb
from dgl.nn import GraphConv

citeseer_metadata_path = "../../examples/citeseer/metadata.json"
citeseer_task_path = "../../examples/citeseer/task.json"
cora_metadata_path = "../../examples/cora/metadata.json"
cora_task_path = "../../examples/cora/task.json"
pubmed_metadata_path = "../../examples/pubmed/metadata.json"
pubmed_task_path = "../../examples/pubmed/task.json"
ogbn_arxiv_metadata_path = "../../examples/ogb_data/node_prediction/ogbn-arxiv/metadata.json"
ogbn_arxiv_task_path = "../../examples/ogb_data/node_prediction/ogbn-arxiv/task.json"
ogbn_mag_metadata_path = "../../examples/ogb_data/node_prediction/ogbn-mag/metadata.json"
ogbn_mag_task_path = "../../examples/ogb_data/node_prediction/ogbn-mag/task.json"

g = glb.graph.read_glb_graph(metadata_path=ogbn_arxiv_metadata_path)
task = glb.task.read_glb_task(task_path=ogbn_arxiv_task_path)

dataset = glb.dataloading.combine_graph_and_task(g, task)
g = dataset[0]

### for ogbn_arxiv ###
g = dgl.add_self_loop(g)

class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

# Create the model with given dimensions
model = GCN(g.ndata['NodeFeature'].shape[1], 16, dataset._num_labels)

def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0

    features = g.ndata['NodeFeature']
    labels = g.ndata['NodeLabel']
    train_mask = g.ndata['train_set']
    val_mask = g.ndata['val_set']
    test_mask = g.ndata['test_set']
    for e in range(100):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 5 == 0:
            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))

# train with cpu
model = GCN(g.ndata['NodeFeature'].shape[1], 16, dataset._num_labels)
train(g, model)

# train with gpu
# g = g.to('cuda')
# model = GCN(g.ndata['NodeFeature'].shape[1], 16, dataset._num_labels).to('cuda')
# train(g, model)

OGBN-ARXIV dataset.
The task is to predict the 40 subject areas of arXiv CS papers, e.g., cs.AI, cs.LG, and cs.OS, which are manually determined (i.e., labeled) by the paper’s authors and arXiv moderators. With the volume of scientific publications doubling every 12 years over the past century, it is practically important to automatically classify each publication’s areas and topics. Formally, the task is to predict the primary categories of the arXiv papers, which is formulated as a 40-class classification problem.
In epoch 0, loss: 3.675, val acc: 0.031 (best 0.031), test acc: 0.026 (best 0.026)
In epoch 5, loss: 3.341, val acc: 0.076 (best 0.078), test acc: 0.059 (best 0.060)
In epoch 10, loss: 3.180, val acc: 0.246 (best 0.246), test acc: 0.217 (best 0.217)
In epoch 15, loss: 3.080, val acc: 0.321 (best 0.330), test acc: 0.277 (best 0.290)
In epoch 20, loss: 2.974, val acc: 0.297 (best 0.330), test acc: 0.255 (best 0.290)
In epoch 25, loss: 2.861, val acc: 0.312 (best 0.330), test 