In [24]:
# Helper function for visualization.
%matplotlib inline

import torch.optim as optim

from torch_geometric.data import DataLoader
import torch


**Graph Neural Networks (GNNs) to the task of node classification**.
Here, we are given the ground-truth labels of only a small subset of nodes, and want to infer the labels for all the remaining nodes (*transductive learning*).

To demonstrate, we make use of the `Cora` dataset, which is a **citation network** where nodes represent documents.
Each node is described by a 1433-dimensional bag-of-words feature vector.
Two documents are connected if there exists a citation link between them.
The task is to infer the category of each document (7 in total).


In [2]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!

Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Contains i

In [3]:
torch.set_printoptions(edgeitems=3000)

In [8]:
data.num_nodes

2708

In [9]:
torch.count_nonzero(data.train_mask)

tensor(140)

In [10]:
data.train_mask.sum().item()

140

In [11]:
data.val_mask.sum().item()

500

In [12]:
data.test_mask.sum().item()

1000

In [13]:
data.y.sum()

tensor(7781)

In [20]:
from src.GCN import GCNStack
model = GCNStack(data.num_node_features, hidden_dim1=128, hidden_dim2=16, output_dim=dataset.num_classes)
print(model)

GCNStack(
  (convs): ModuleList(
    (0): GCNConv(1433, 128)
    (1): GCNConv(128, 16)
    (2): GCNConv(16, 16)
  )
  (lns): ModuleList(
    (0): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
  )
  (post_mp): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=7, bias=True)
  )
)


In [23]:
def model_test(loader, model, is_validation=False, is_training=False):
    ''' Testing Code of the Model '''
    model.eval()

    correct = 0
    for data in loader:
        with torch.no_grad():
            emb, pred = model(data.x, data.edge_index)
            pred = pred.argmax(dim=1)
            label = data.y

        if is_training:
            mask = data.val_mask if is_validation else data.train_mask
        else: # testing
            mask = data.val_mask if is_validation else data.test_mask
        # node classification: only evaluate on nodes in test set
        pred = pred[mask]
        label = data.y[mask]

        correct += pred.eq(label).sum().item()
    total = 0
    for data in loader.dataset:
        if is_training:
            total += torch.sum(data.train_mask).item()
        else:
            total += torch.sum(data.test_mask).item()
    return correct / total

def model_train(dataset, writer, model, epoch_num, lr, weight_decay):
    ''' Training code of the model '''
    test_loader = loader = DataLoader(dataset, shuffle=False)

    # Optimizer
    # opt = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9)
    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # visualize the model architecture in tensorboard
    # writer.add_graph(model, ( data.x, data.edge_index ))

    # Training:
    for epoch in range(epoch_num + 1):
        total_loss = 0
        model.train()
        for batch in loader:
            #print(batch.train_mask, '----')
            opt.zero_grad()
            embedding, pred = model(batch.x, batch.edge_index)
            label = batch.y
            pred = pred[batch.train_mask]
            label = label[batch.train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            opt.step()
            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(loader.dataset)
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = model_test(test_loader, model, is_training=False)
            print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format(
                epoch, total_loss, test_acc))
            writer.add_scalar("test accuracy", test_acc, epoch)

        if epoch % 20 == 0:
            name = 'epoch' + str(epoch)
            writer.add_embedding(embedding, global_step=epoch, tag=name, metadata=batch.y)

    return model

from datetime import datetime
from tensorboardX import SummaryWriter

writer = SummaryWriter("./log/" + datetime.now().strftime("%Y%m%d-%H%M%S"))

model = model_train(dataset, writer, model, epoch_num=200, lr=0.01, weight_decay=4e-4)

Epoch 0. Loss: 1.9241. Test accuracy: 0.2160
Epoch 10. Loss: 1.6689. Test accuracy: 0.2710
Epoch 20. Loss: 1.2258. Test accuracy: 0.3770
Epoch 30. Loss: 0.9460. Test accuracy: 0.6220
Epoch 40. Loss: 0.5951. Test accuracy: 0.6830
Epoch 50. Loss: 0.4025. Test accuracy: 0.7730
Epoch 60. Loss: 0.1766. Test accuracy: 0.7570
Epoch 70. Loss: 0.3255. Test accuracy: 0.6910
Epoch 80. Loss: 0.0685. Test accuracy: 0.7540
Epoch 90. Loss: 0.0849. Test accuracy: 0.7850
Epoch 100. Loss: 0.1008. Test accuracy: 0.7830
Epoch 110. Loss: 0.0472. Test accuracy: 0.7730
Epoch 120. Loss: 0.0256. Test accuracy: 0.7800
Epoch 130. Loss: 0.0891. Test accuracy: 0.7600
Epoch 140. Loss: 0.0616. Test accuracy: 0.7790
Epoch 150. Loss: 0.0815. Test accuracy: 0.7650
Epoch 160. Loss: 0.0547. Test accuracy: 0.7460
Epoch 170. Loss: 0.0180. Test accuracy: 0.7580
Epoch 180. Loss: 0.0690. Test accuracy: 0.7620
Epoch 190. Loss: 0.1608. Test accuracy: 0.7350
Epoch 200. Loss: 0.0620. Test accuracy: 0.7700
