In [2]:
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric


In [4]:
!pip install dgl

Collecting dgl
[?25l  Downloading https://files.pythonhosted.org/packages/71/c4/ce24841375cf4393787dbf9a645e271c19a03d2d9a0e5770b08ba76bcfde/dgl-0.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.4MB)
[K     |████████████████████████████████| 4.4MB 8.7MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.6.1


In [5]:
import numpy as np
import torch
import torch_geometric as tg

import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv

class GATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim):
        super(GATLayer, self).__init__()
        self.g = g
        # equation (1)
        self.fc = nn.Linear(in_dim, out_dim, bias=False)
        # equation (2)
        self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain('relu')
        nn.init.xavier_normal_(self.fc.weight, gain=gain)
        nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)

    def edge_attention(self, edges):
        # edge UDF for equation (2)
        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        a = self.attn_fc(z2)
        return {'e': F.leaky_relu(a)}

    def message_func(self, edges):
        # message UDF for equation (3) & (4)
        return {'z': edges.src['z'], 'e': edges.data['e']}

    def reduce_func(self, nodes):
        # reduce UDF for equation (3) & (4)
        # equation (3)
        alpha = F.softmax(nodes.mailbox['e'], dim=1)
        # equation (4)
        h = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        return {'h': h}

    def forward(self, h):
        # equation (1)
        z = self.fc(h)
        self.g.ndata['z'] = z
        # equation (2)
        self.g.apply_edges(self.edge_attention)
        # equation (3) & (4)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop('h')

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [11]:
class MultiHeadGATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer(g, in_dim, out_dim))
        self.merge = merge

    def forward(self, h):
        head_outs = [attn_head(h) for attn_head in self.heads]
        if self.merge == 'cat':
            # concat on the output feature dimension (dim=1)
            return torch.cat(head_outs, dim=1)
        else:
            # merge using average
            return torch.mean(torch.stack(head_outs))

In [26]:
class GAT(nn.Module):
    def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
        super(GAT, self).__init__()
        self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads)
        # Be aware that the input dimension is hidden_dim*num_heads since
        # multiple head outputs are concatenated together. Also, only
        # one attention head in the output layer.
        self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1)

    def forward(self, h):
        h = self.layer1(h)
        h = F.elu(h)
        h = self.layer2(h)
        return h

In [22]:
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh
import networkx as nx

def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.BoolTensor(data.train_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, mask, data.test_mask

In [21]:
data.test_mask

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [27]:
import time
import numpy as np

g, features, labels, mask, test_mask = load_cora_data()

# create the model, 2 heads, each head has hidden size 8
net = GAT(g,
          in_dim=features.size()[1],
          hidden_dim=8,
          out_dim=7,
          num_heads=2)

# create optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

# main loop
dur = []
for epoch in range(500):
    if epoch >= 3:
        t0 = time.time()

    logits = net(features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[mask], labels[mask])
    pred = logp.argmax(1)

    train_acc = (pred[mask] == labels[mask]).float().mean()
    test_acc = (pred[test_mask] == labels[test_mask]).float().mean()
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >= 3:
        dur.append(time.time() - t0)

    print("Epoch {:05d} | Loss {:.4f} | Training Accuracy {:.4f} | Testing Accuracy {:.4f} | Time(s) {:.4f}".format(
        epoch, loss.item(), train_acc.item(), test_acc.item(), np.mean(dur)))
    



  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch 00000 | Loss 1.9463 | Training Accuracy 0.1357 | Testing Accuracy 0.0940 | Time(s) nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00001 | Loss 1.9443 | Training Accuracy 0.2071 | Testing Accuracy 0.1230 | Time(s) nan
Epoch 00002 | Loss 1.9423 | Training Accuracy 0.2571 | Testing Accuracy 0.1440 | Time(s) nan
Epoch 00003 | Loss 1.9403 | Training Accuracy 0.3071 | Testing Accuracy 0.1650 | Time(s) 0.1290
Epoch 00004 | Loss 1.9383 | Training Accuracy 0.3500 | Testing Accuracy 0.1960 | Time(s) 0.1234
Epoch 00005 | Loss 1.9363 | Training Accuracy 0.3857 | Testing Accuracy 0.2170 | Time(s) 0.1243
Epoch 00006 | Loss 1.9343 | Training Accuracy 0.4357 | Testing Accuracy 0.2530 | Time(s) 0.1237
Epoch 00007 | Loss 1.9323 | Training Accuracy 0.4929 | Testing Accuracy 0.2850 | Time(s) 0.1244
Epoch 00008 | Loss 1.9303 | Training Accuracy 0.5571 | Testing Accuracy 0.3180 | Time(s) 0.1259
Epoch 00009 | Loss 1.9283 | Training Accuracy 0.6429 | Testing Accuracy 0.3450 | Time(s) 0.1259
Epoch 00010 | Loss 1.9262 | Training Accuracy 0.7071 | Testing Accuracy 0.3820 | Time(s) 0.1255
Epoch 00011 | Loss 1.9242 | Training Accuracy 

In [16]:
logp.argmax(1)

tensor([3, 4, 4,  ..., 3, 4, 3])

In [15]:
logp

tensor([[-1.9590, -1.9811, -1.9442,  ..., -1.9677, -1.9696, -1.9328],
        [-1.9500, -1.9678, -1.9415,  ..., -1.8976, -1.9645, -1.9607],
        [-1.9388, -2.0051, -1.9410,  ..., -1.8799, -1.9569, -1.9730],
        ...,
        [-1.9465, -1.9239, -1.9630,  ..., -1.9787, -1.9252, -1.9698],
        [-1.9347, -1.9868, -1.9368,  ..., -1.9109, -1.9519, -1.9807],
        [-1.9357, -1.9757, -1.9487,  ..., -1.9256, -1.9539, -1.9656]],
       grad_fn=<LogSoftmaxBackward>)