https://pytorch-geometric.readthedocs.io/en/latest/get_started/introduction.html

In [1]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

In [3]:
Data(x=x, edge_index=edge_index)

Data(x=[3, 1], edge_index=[2, 4])

In [5]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())

In [6]:
data

Data(x=[3, 1], edge_index=[2, 4])

In [8]:
print(data.keys)

['edge_index', 'x']


In [9]:
print(data['x'])

tensor([[-1.],
        [ 0.],
        [ 1.]])


In [10]:
for key, item in data:
    print(f'{key} found in data')

x found in data
edge_index found in data


In [11]:
data.num_nodes


3

In [12]:
data.num_edges

4

# Common Benchmark Datasets

PyG contains a large number of common benchmark datasets, e.g., all Planetoid datasets (Cora, Citeseer, Pubmed), all graph classification datasets from http://graphkernels.cs.tu-dortmund.de and their cleaned versions, the QM7 and QM9 dataset, and a handful of 3D mesh/point cloud datasets like FAUST, ModelNet10/40 and ShapeNet.

Initializing a dataset is straightforward. An initialization of a dataset will automatically download its raw files and process them to the previously described Data format. E.g., to load the ENZYMES dataset (consisting of 600 graphs within 6 classes), type:

In [13]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting /tmp/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


In [14]:
len(dataset)

600

In [15]:
dataset.num_classes

6

In [16]:
dataset.num_node_features

3

In [17]:
data = dataset[0]

In [19]:
data.is_undirected()

True

In [20]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

len(dataset)

dataset.num_classes

dataset.num_node_features


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


1433

In [21]:
# 
# * データローダー

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)
    # >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])

    print(batch.num_graphs)
    # >>> 32

DataBatch(edge_index=[2, 3940], x=[1158, 21], y=[32], batch=[1158], ptr=[33])
32
DataBatch(edge_index=[2, 4008], x=[1031, 21], y=[32], batch=[1031], ptr=[33])
32
DataBatch(edge_index=[2, 3874], x=[1074, 21], y=[32], batch=[1074], ptr=[33])
32
DataBatch(edge_index=[2, 3948], x=[1022, 21], y=[32], batch=[1022], ptr=[33])
32
DataBatch(edge_index=[2, 3530], x=[878, 21], y=[32], batch=[878], ptr=[33])
32
DataBatch(edge_index=[2, 4136], x=[1114, 21], y=[32], batch=[1114], ptr=[33])
32
DataBatch(edge_index=[2, 3804], x=[1033, 21], y=[32], batch=[1033], ptr=[33])
32
DataBatch(edge_index=[2, 4208], x=[1054, 21], y=[32], batch=[1054], ptr=[33])
32
DataBatch(edge_index=[2, 3922], x=[1031, 21], y=[32], batch=[1031], ptr=[33])
32
DataBatch(edge_index=[2, 4280], x=[1118, 21], y=[32], batch=[1118], ptr=[33])
32
DataBatch(edge_index=[2, 4156], x=[1054, 21], y=[32], batch=[1054], ptr=[33])
32
DataBatch(edge_index=[2, 3614], x=[1001, 21], y=[32], batch=[1001], ptr=[33])
32
DataBatch(edge_index=[2, 3590]

# Learning Methods on Graphs

After learning about data handling, datasets, loader and transforms in PyG, it’s time to implement our first graph neural network!

We will use a simple GCN layer and replicate the experiments on the Cora citation dataset. For a high-level explanation on GCN, have a look at its blog post.

We first need to load the Cora dataset:

In [22]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [24]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index) 
        x = F.relu(x)# ReLU
        x = F.dropout(x, training=self.training) 
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1) # ソフトマックス

In [27]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    # print(epoch)
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [28]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7990


# Message Passing 

In [29]:
import torch
from torch.nn import Linear, Parameter
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class GCNConv(MessagePassing): # MessagePassingクラスを継承する
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='add')  # "Add" aggregation (Step 5).
        self.lin = Linear(in_channels, out_channels, bias=False)
        self.bias = Parameter(torch.empty(out_channels))

        self.reset_parameters()

    def reset_parameters(self):
        self.lin.reset_parameters()
        self.bias.data.zero_()

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)

        # Step 3: Compute normalization.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4-5: Start propagating messages.
        out = self.propagate(edge_index, x=x, norm=norm)

        # Step 6: Apply a final bias vector.
        out += self.bias

        return out

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j