In [2]:
# cpu version
%pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: torch_geometric
  Building wheel for torch_geometric (pyproject.toml) ... [?25ldone
[?25h  Created wheel for torch_geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910459 sha256=60f942843f5e23ae3f1c1b247c48a6d36bb22cf4850dcc8c157fb257301f4922
  Stored in directory: /home/codespace/.cache/pip/wheels/ac/dc/30/e2874821ff308ee67dcd7a66dbde912411e19e35a1addda028
Successfully built torch_geometric
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.3.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
# optional dependencies:
%pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/pyg_lib-0.2.0%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (627 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m627.0/627.0 kB[0m [31m861.8 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.1%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (504 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m504.1/504.1 kB[0m [31m702.1 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.17%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/t

Graph is a model used to represent the relationship between nodes. In PyG, a graph is represented by an instance of torch_geometric.data.Data, which has the following default properties:
- Data.x: it represents the feature matrix of nodes, and the shape is [num_nodes, num_node_features].
- Data.edge_index: an edge in COO format, with the shape of [2, num_edges] and the type of torch.long. COO is a method used to describe sparse matrix. Simply put, non-zero elements in the matrix are represented by coordinates and values, and zero elements are not represented, so as to save space. In Data.edge_index, the first line represents the starting point numbers of all edges, and the second line represents the target node numbers corresponding to the edges.
- Data.edge_attr: a feature matrix representing an edge, with the shape of [num_edges, num_edge_features].
- Data.y: Training label (may have any shape). For example, if it is a node-level label, its shape is [[num_nodes, \*]; If it is a graph-level label, its shape is [1,\*].
- Data.pos: indicates the location matrix of nodes, and the shape is [num_nodes, num_dimensions].

In [5]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0,1,1,2],
                          [1,0,2,1]],dtype=torch.long)
x = torch.tensor([[-1],[0],[1]],dtype=torch.float)

data = Data(x=x,edge_index=edge_index)
data # Data(x=[3, 1], edge_index=[2, 4]) 3 nodes, 1 feature, 2 edges, 4 edge_index

Data(x=[3, 1], edge_index=[2, 4])

![Alt text](https://pytorch-geometric.readthedocs.io/en/latest/_images/graph.svg "graph structure")

In [6]:
import torch
from torch_geometric.data import Data

# shape=[num_edge,2]
# one line one edge
edge_index = torch.tensor(
    [[0,1],
    [1,0],
    [1,2],
    [2,1]],dtype=torch.long
)
x = torch.tensor([[-1],[0],[1]],dtype=torch.float)

data = Data(x=x,edge_index=edge_index.t().contiguous())
data

Data(x=[3, 1], edge_index=[2, 4])

You can check whether the final generated Data object meets these requirements by running validate ().

In [7]:
data.validate(raise_on_error=True)

True

In [17]:
print(data.keys)
print(data['x'])
print('---------------------')
for key,item in data:
    print(f'{key}:{item}')
print('---------------------')
print("number of nodes: ",data.num_nodes)
print("number of edges: ",data.num_edges)  # An undirected edge counts as two edges.
print(data.num_node_features)
print(data.has_isolated_nodes())
print(data.has_self_loops())
print(data.is_directed())

['edge_index', 'x']
tensor([[-1.],
        [ 0.],
        [ 1.]])
---------------------
x:tensor([[-1.],
        [ 0.],
        [ 1.]])
edge_index:tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])
---------------------
number of nodes:  3
number of edges:  4
1
False
False
False


In [18]:
# Transfoer data object to GPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

In [19]:
# public dataset
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='./data/ENZYMES',name='ENZYMES')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting data/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


In [21]:
print(len(dataset)) # 600 graphs
print(dataset.num_classes) # 6
print(dataset.num_node_features) # 3

600
6
3


In [22]:
data = dataset[0] # 37 nodes, 3 features, 84 edges, 1 classes
print(data)
print(data.is_undirected())

Data(edge_index=[2, 168], x=[37, 3], y=[1])
True


In [23]:
dataset = dataset.shuffle()

train_dataset = dataset[:540]
test_dataset = dataset[540:]

In [24]:
# check another public dataset Cora
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='./data/Cora',name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [25]:
print(len(dataset))
print(dataset.num_classes)
print(dataset.num_node_features)

1
7
1433


In [27]:
data = dataset[0]
print(data)
print(data.is_undirected())
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
True
140
500
1000


In [30]:
# data loader
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.utils import scatter

dataset = TUDataset(root='./data/ENZYMES',name='ENZYMES',use_node_attr=True)
loader = DataLoader(dataset,batch_size=32,shuffle=True)
for batch in loader:
    print(batch.num_graphs)
    # x = scatter(data.x, data.batch,dim=0,reduce='mean')
    # print(x.size())

32
32
32
32
32
32
32
32
32
32
32
32
32
32
32
32
32
32
24


In [31]:
# GCN
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='./data/Cora',name='Cora')

In [35]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN,self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features,16)
        self.conv2 = GCNConv(16,dataset.num_classes)
    
    def forward(self,data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x,edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)  # training is a boolean flag indicating whether the model is in training or in evaluation mode.
        x = self.conv2(x,edge_index)
        return F.log_softmax(x,dim=1)

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01,weight_decay=5e-4) # weight_decay is L2 regularization
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask],data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [37]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask]==data.y[data.test_mask]).sum()
acc = int(correct)/int(data.test_mask.sum())
print(f'Accuracy:{acc:.4f}')

Accuracy:0.8020
