### Imports section

In [None]:
!pip install torch
import torch
print(torch.__version__)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
1.12.1+cu113


In [None]:
!pip install torch-geometric torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.1+cu113.html
Collecting torch-geometric
  Downloading torch_geometric-2.1.0.post1.tar.gz (467 kB)
[K     |████████████████████████████████| 467 kB 7.6 MB/s 
[?25hCollecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 6.3 MB/s 
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_sparse-0.6.15-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 40.6 MB/s 
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.1.0.post1-py3-none-any.whl size=689859 sha256=60066964b036041b8805582b375c16d951040c

## Alrighty, now to do the tutorial
- As described here: https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html#

### Intro stuff

In [None]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

In [None]:
data

Data(x=[3, 1], edge_index=[2, 4])

In [None]:
edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
#contiguous is what allows for list of index tuples def of edges as in above

In [None]:
data

Data(x=[3, 1], edge_index=[2, 4])

In [None]:
data.keys

['x', 'edge_index']

In [None]:
data['x']

tensor([[-1.],
        [ 0.],
        [ 1.]])

In [None]:
for key, item in data:
  print(f'{key} found in data')

x found in data
edge_index found in data


### Common Benchmark Datasets

In [None]:
from torch_geometric.datasets import TUDataset

In [None]:
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting /tmp/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


In [None]:
dataset

ENZYMES(600)

In [None]:
dataset.num_classes

6

In [None]:
dataset.num_node_features

3

In [None]:
data = dataset[0]
data

Data(edge_index=[2, 168], x=[37, 3], y=[1])

In [None]:
data.is_undirected()

True

In [None]:
dataset = dataset.shuffle()

In [None]:
train_dataset = dataset[:540]
train_dataset

ENZYMES(540)

In [None]:
test_dataset = dataset[540:]
test_dataset

ENZYMES(60)

#### Another dataset to test with

In [None]:
from torch_geometric.datasets import Planetoid

In [None]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Cora()

In [None]:
len(dataset)

1

In [None]:
dataset.num_classes

7

In [None]:
dataset.num_node_features

1433

In [None]:
data = dataset[0]
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [None]:
# Seems this dataset has already pre-labelled train/test/val labels on data!
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())

140
500
1000


### Mini-batches

In [None]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

In [None]:
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
for batch in loader:
  print(batch)
  print(batch.num_graphs)

DataBatch(edge_index=[2, 3984], x=[1151, 21], y=[32], batch=[1151], ptr=[33])
32
DataBatch(edge_index=[2, 4016], x=[1057, 21], y=[32], batch=[1057], ptr=[33])
32
DataBatch(edge_index=[2, 4318], x=[1188, 21], y=[32], batch=[1188], ptr=[33])
32
DataBatch(edge_index=[2, 4314], x=[1182, 21], y=[32], batch=[1182], ptr=[33])
32
DataBatch(edge_index=[2, 3884], x=[991, 21], y=[32], batch=[991], ptr=[33])
32
DataBatch(edge_index=[2, 3670], x=[978, 21], y=[32], batch=[978], ptr=[33])
32
DataBatch(edge_index=[2, 3600], x=[936, 21], y=[32], batch=[936], ptr=[33])
32
DataBatch(edge_index=[2, 4624], x=[1166, 21], y=[32], batch=[1166], ptr=[33])
32
DataBatch(edge_index=[2, 3708], x=[961, 21], y=[32], batch=[961], ptr=[33])
32
DataBatch(edge_index=[2, 4288], x=[1126, 21], y=[32], batch=[1126], ptr=[33])
32
DataBatch(edge_index=[2, 3886], x=[1050, 21], y=[32], batch=[1050], ptr=[33])
32
DataBatch(edge_index=[2, 4424], x=[1177, 21], y=[32], batch=[1177], ptr=[33])
32
DataBatch(edge_index=[2, 3932], x=[9

#### Averaging node features in node dim (for each graph individually) example
- btw, batch is a column vector which maps each node to its respective graph in the batcha

In [None]:
from torch_scatter import scatter_mean
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader\

In [None]:

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
for data in loader:
  print(data)
  print(data.num_graphs)
  x = scatter_mean(data.x, data.batch, dim=0)
  print(x.size())

DataBatch(edge_index=[2, 4188], x=[1066, 21], y=[32], batch=[1066], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3902], x=[1012, 21], y=[32], batch=[1012], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3864], x=[1042, 21], y=[32], batch=[1042], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4184], x=[1103, 21], y=[32], batch=[1103], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3622], x=[945, 21], y=[32], batch=[945], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4308], x=[1186, 21], y=[32], batch=[1186], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3866], x=[972, 21], y=[32], batch=[972], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4192], x=[1089, 21], y=[32], batch=[1089], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4052], x=[1032, 21], y=[32], batch=[1032], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3620], x=[927, 21], y=[32], batch=[927], ptr=[33])
32
torch.S

### Data Transforms

In [None]:
from torch_geometric.datasets import ShapeNet

In [None]:
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
dataset[0]

Processing...
Done!


Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

In [None]:
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'] ,pre_transform=T.KNNGraph(k=6))
dataset[0]

Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

In [None]:
import torch_geometric.transforms as T
from torch_geometric.datasets import ShapeNet

In [None]:
dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'], pre_transform=T.KNNGraph(k=6), transform=T.RandomJitter(0.01))

In [None]:
dataset[0]

Data(x=[2518, 3], y=[2518], pos=[2518, 3], category=[1])

### Learning mthds on a graph: finally!

In [None]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset

Cora()

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

# 2 layer graph convolutional neural net
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [None]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7950
