In [1]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-geometric

[K     |████████████████████████████████| 11.9MB 8.3MB/s 
[K     |████████████████████████████████| 24.3MB 88.0MB/s 
[K     |████████████████████████████████| 194kB 8.6MB/s 
[K     |████████████████████████████████| 235kB 15.6MB/s 
[K     |████████████████████████████████| 2.2MB 19.0MB/s 
[K     |████████████████████████████████| 51kB 801kB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [13]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

In [3]:
dataset = TUDataset(root = 'data/TUDataset', name = 'MUTAG')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Extracting data/TUDataset/MUTAG/MUTAG.zip
Processing...
Done!


In [4]:
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]

print()
print(data)
print('=============================================================')

print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_attr=[38, 4], edge_index=[2, 38], x=[17, 7], y=[1])
Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [6]:
torch.manual_seed(101010)
dataset = dataset.shuffle()

train_dataset = dataset[:150]
test_dataset = dataset[150:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 150
Number of test graphs: 38


In [8]:
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = False)

In [9]:
for step, data in enumerate(train_loader):
  print(f"Step {step + 1}:")
  print("----------------")
  print(f"Number of graphs in the current batch: {data.num_graphs}")
  print(data)
  print()

Step 1:
----------------
Number of graphs in the current batch: 64
Batch(batch=[1065], edge_attr=[2342, 4], edge_index=[2, 2342], x=[1065, 7], y=[64])

Step 2:
----------------
Number of graphs in the current batch: 64
Batch(batch=[1201], edge_attr=[2676, 4], edge_index=[2, 2676], x=[1201, 7], y=[64])

Step 3:
----------------
Number of graphs in the current batch: 22
Batch(batch=[429], edge_attr=[942, 4], edge_index=[2, 942], x=[429, 7], y=[22])



In [14]:
class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super(GCN, self).__init__()
    torch.manual_seed(101010)
    self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, hidden_channels)
    self.conv3 = GCNConv(hidden_channels, hidden_channels)
    self.lin = Linear(hidden_channels, dataset.num_classes)

  def forward(self, x, edge_index, batch):

    x = self.conv1(x, edge_index)
    x = x.relu()
    x = self.conv2(x, edge_index)
    x = x.relu()
    x = self.conv3(x, edge_index)

    x = global_mean_pool(x, batch)

    x = F.dropout(x, p = 0.2, training = self.training)
    x = self.lin(x)

    return x

In [15]:
model = GCN(hidden_channels = 64)
print(model)

GCN(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)
