In [1]:
import torch, dgl
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from dgl.data import DGLDataset, CoraGraphDataset
from dgl.nn.pytorch import GraphConv
from colorama import Fore

In [2]:
dataset = CoraGraphDataset()
#NOTE cora have only one graph
g = dataset[0]      # def __getitem__(self, idx) assert idx == 0, "This dataset has only one graph"

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [3]:
print('Number of categories:', dataset.num_classes)


def show_graph_data():
    #NOTE show node/edge feature
    print('Node features')
    for k, v in g.ndata.items():
        print(Fore.RED, k, Fore.RESET)
        print(v)
    

    print('shape[0] is number of node') 
    print('shape[1] is length of vector node feature')
    print('\nshape of node feature matrix X: ', g.ndata['feat'].shape)
    # print('Edge features')
    # for k, v in g.edata.items():
    #     print(k, v)

show_graph_data()

Number of categories: 7
Node features
[31m feat [39m
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0526, 0.0000]])
[31m label [39m
tensor([4, 4, 4,  ..., 4, 3, 3])
[31m test_mask [39m
tensor([ True,  True, False,  ..., False, False, False])
[31m val_mask [39m
tensor([False, False,  True,  ..., False, False, False])
[31m train_mask [39m
tensor([False, False, False,  ..., False, False, False])
shape[0] is number of node
shape[1] is length of vector node feature

shape of node feature matrix X:  torch.Size([2708, 1433])


#### defind GNN architechture

In [4]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

#### training function

In [12]:
def train(g:DGLDataset, model:nn.Module, epochs:int):
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    for epoch in range(1, epochs+1):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that you should only compute the losses of the nodes in the training set.
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not epoch % 5:
            print('In epoch {:3d}, loss: {:.4f}, val acc: {:.4f} (best {:.4f}), test acc: {:.4f} (best {:.4f})'.format(
                epoch, loss, val_acc, best_val_acc, test_acc, best_test_acc))

#### Create model and training

In [13]:
model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)
train(g, model, 1000)



In epoch   5, loss: 1.9084, val acc: 0.3700 (best 0.3700), test acc: 0.4050 (best 0.4050)
In epoch  10, loss: 1.8400, val acc: 0.4700 (best 0.4700), test acc: 0.5000 (best 0.5000)
In epoch  15, loss: 1.7463, val acc: 0.5040 (best 0.5040), test acc: 0.5460 (best 0.5460)
In epoch  20, loss: 1.6289, val acc: 0.5880 (best 0.5880), test acc: 0.6200 (best 0.6200)
In epoch  25, loss: 1.4896, val acc: 0.6440 (best 0.6440), test acc: 0.6840 (best 0.6840)
In epoch  30, loss: 1.3316, val acc: 0.7040 (best 0.7040), test acc: 0.7150 (best 0.7100)
In epoch  35, loss: 1.1621, val acc: 0.7400 (best 0.7400), test acc: 0.7330 (best 0.7330)
In epoch  40, loss: 0.9905, val acc: 0.7500 (best 0.7520), test acc: 0.7430 (best 0.7400)
In epoch  45, loss: 0.8267, val acc: 0.7660 (best 0.7660), test acc: 0.7580 (best 0.7580)
In epoch  50, loss: 0.6787, val acc: 0.7780 (best 0.7780), test acc: 0.7680 (best 0.7680)
In epoch  55, loss: 0.5510, val acc: 0.7900 (best 0.7900), test acc: 0.7660 (best 0.7660)
In epoch  

#### Training on GPU

In [None]:
g = g.to('cuda')
model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes).to('cuda')
train(g, model)