In [1]:
import argparse
import os.path as osp
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric
import torch_geometric.data as geom_data
import numpy as np
from torch_geometric.datasets import Entities
from torch_geometric.utils import k_hop_subgraph
from torch_geometric.nn import RGCNConv, FastRGCNConv


In [2]:
cwd = os.getcwd()

dataset = Entities(cwd, "MUTAG")
data = dataset[0]

available dataset info includes:  'name',
 'num_classes',
 'num_edge_features',
 'num_features',
 'num_node_features',
 'num_relations',

In [3]:
# Data exploration cell

print("Data object:", dataset.data)
print("Length:", len(dataset))
print("Dataset: ", dataset)
#print("Average label: %4.2f" % (dataset.data.y.float().mean().item()))
print(dataset.num_relations, dataset.num_classes)


Data object: Data(edge_index=[2, 148454], edge_type=[148454], test_idx=[68], test_y=[68], train_idx=[272], train_y=[272])
Length: 1
Dataset:  MUTAGEntities()
46 2


In [7]:
# Utility functions
from torch.distributions.uniform import Uniform
from torch.distributions.normal import Normal

## To get uniform embedding weights of custom range
def uniform_embeddings(num_nodes, emb_dim, device=None):
    uniform_distribution = Uniform(torch.tensor([-100.0]), torch.tensor([100.0]))

    # Generate random center between -100 and 100
    node_embeddings = uniform_distribution.sample((num_nodes, int(emb_dim))).squeeze(-1)
    if device:
        node_embeddings = node_embeddings.to(device)
    node_embeddings.requires_grad = True

    return node_embeddings

## To get default embeddings,weights around 0 use:
## Embedding (num_nodes, emb_dim)
embedding = torch.nn.Embedding(10, 4)

In [8]:
# torch geometric automatically implements batching multiple graphs into 1 huge block diagonal adjacency matrix 
# + concatenates feature matrices etc.
#graph_train_loader = geom_data.DataLoader(train_dataset, batch_size=64, shuffle=True)
#graph_val_loader = geom_data.DataLoader(test_dataset, batch_size=64) # Additional loader if you want to change to a larger dataset
#graph_test_loader = geom_data.DataLoader(test_dataset, batch_size=64)

In [9]:
# BGS and AM graphs are too big to process them in a full-batch fashion.
# Since our model does only make use of a rather small receptive field, we
# filter the graph to only contain the nodes that are at most 2-hop neighbors
# away from any training/test node.

# k_hop_subgraph
# Computes the k-hop subgraph of edge_index around node node_idx, returns:
# (1) the nodes involved in the subgraph, 
# (2) the filtered edge_index connectivity, 
# (3) the mapping from node indices in node_idx to their new location, and 
# (4) the edge mask indicating which edges were preserved.

node_idx = torch.cat([data.train_idx, data.test_idx], dim=0)
node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
    node_idx, 2, data.edge_index, relabel_nodes=True)

data.num_nodes = node_idx.size(0)
data.edge_index = edge_index
data.edge_type = data.edge_type[edge_mask]
data.train_idx = mapping[:data.train_idx.size(0)]
data.test_idx = mapping[data.train_idx.size(0):]

np.unique(data.edge_type)


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45])

We have 38 graphs stacked together for the test dataset. The batch indices, stored in batch, show that the first 12 nodes belong to the first graph, the next 22 to the second graph, and so on.

These indices are important for performing the final prediction. To perform a prediction over a whole graph, we usually perform a pooling operation over all nodes after running the GNN model. In this case, we will use the average pooling. Hence, we need to know which nodes should be included in which average pool. Using this pooling, we can already create our graph network below. Specifically, we re-use our class GNNModel from before, and simply add an average pool and single linear layer for the graph prediction task.


In [10]:
# Model params
embedding_dim = 16
num_bases = 30

# Create initial embedding = 2D tensor (num_nodes,embedding_dim)
untrained_embedding = uniform_embeddings(data.num_nodes, embedding_dim)

In [12]:
untrained_embedding

tensor([[ 93.1385, -18.0757,  71.2376,  ..., -13.5010, -61.0333, -87.1491],
        [-79.4696, -98.9854, -53.3307,  ..., -96.5766,  -0.3052, -50.2125],
        [ 21.8054, -41.5483, -39.2340,  ...,  44.3851, -95.2284, -38.6289],
        ...,
        [ 74.9765,   3.0612,  86.9940,  ..., -36.3838,   7.5832,  13.8613],
        [ 62.7756, -10.8657,  90.2944,  ...,  70.3103, -61.2729,  78.3486],
        [-65.1617,  95.2100,  18.0446,  ..., -43.2312,  54.8700, -40.9425]],
       requires_grad=True)

In [14]:
# Neural network with 2 RGCNConv layers, input = node embeddings
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.embedding_dim = embedding_dim
        self.node_embeddings = []
        
        self.conv1 = RGCNConv(in_channels=embedding_dim, out_channels=embedding_dim, num_relations=dataset.num_relations,
                              num_bases=num_bases)
        self.conv2 = RGCNConv(in_channels=embedding_dim, out_channels=embedding_dim, num_relations=dataset.num_relations,
                              num_bases=num_bases)
        

    def forward(self, embedding, edge_index, edge_type):
        x = F.relu(self.conv1(embedding, edge_index, edge_type))
        x = self.conv2(x, edge_index, edge_type)   
        x = F.log_softmax(x, dim=1)
        # Here we save node embeddings for all nodes = shape [23606,2]
        self.node_embeddings = x
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)


In [15]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(untrained_embedding, data.edge_index, data.edge_type)
    loss = F.nll_loss(out[data.train_idx], data.train_y)
    loss.backward()
    optimizer.step()
    return loss.item(), out


@torch.no_grad()
def test():
    model.eval()
    pred = model(untrained_embedding, data.edge_index, data.edge_type).argmax(dim=-1)
    train_acc = pred[data.train_idx].eq(data.train_y).to(torch.float).mean()
    test_acc = pred[data.test_idx].eq(data.test_y).to(torch.float).mean()
    return train_acc.item(), test_acc.item()

# Train for 50 epochs to get trained node embedding of size (data.num_nodes, embedding_dim)
for epoch in range(1, 51):
    loss, embedding = train()
    train_acc, test_acc = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f} '
          f'Test: {test_acc:.4f}')
    
print(model.node_embeddings)
print(model.node_embeddings.shape)

Epoch: 01, Loss: 266.2306, Train: 0.4632 Test: 0.4412
Epoch: 02, Loss: 61.0120, Train: 0.6066 Test: 0.6618
Epoch: 03, Loss: 74.8870, Train: 0.6066 Test: 0.6765
Epoch: 04, Loss: 56.3706, Train: 0.5588 Test: 0.5000
Epoch: 05, Loss: 31.7682, Train: 0.4228 Test: 0.3676
Epoch: 06, Loss: 56.9123, Train: 0.6287 Test: 0.6029
Epoch: 07, Loss: 18.7759, Train: 0.6250 Test: 0.6765
Epoch: 08, Loss: 36.2677, Train: 0.6618 Test: 0.6765
Epoch: 09, Loss: 28.0958, Train: 0.6471 Test: 0.5588
Epoch: 10, Loss: 14.0498, Train: 0.5257 Test: 0.4412
Epoch: 11, Loss: 27.3179, Train: 0.6618 Test: 0.6029
Epoch: 12, Loss: 15.1853, Train: 0.7353 Test: 0.6618
Epoch: 13, Loss: 14.9577, Train: 0.6949 Test: 0.6765
Epoch: 14, Loss: 19.2661, Train: 0.7463 Test: 0.6618
Epoch: 15, Loss: 12.2312, Train: 0.7169 Test: 0.6176
Epoch: 16, Loss: 9.0807, Train: 0.6838 Test: 0.5882
Epoch: 17, Loss: 13.2620, Train: 0.7610 Test: 0.6324
Epoch: 18, Loss: 7.6053, Train: 0.7757 Test: 0.6618
Epoch: 19, Loss: 7.5922, Train: 0.7279 Test: 0.