# Basic Graph Neural Networks with Python

> Philipp Zimmermann

### Imports

In [1]:
# Logic
import torch
from torch_geometric.data import Data

# Visualization
from IPython.display import clear_output
import ipywidgets as widgets

### Versions

In [2]:
# PyTorch
TORCH = torch.__version__
print(f'PyTorch: {TORCH}')

# Cuda
CUDA = torch.version.cuda
print(f'Cuda:    {CUDA}')

PyTorch: 1.8.0+cu111
Cuda:    11.1


### Graph Object

In [3]:
# 3 nodes
# each with one feature: -1, 0 and 1
nodes = torch.tensor([[-1], [0], [1]], dtype=torch.float)

# 4 edges 
# 
# edge_index represents their source and target node
# undirected graph, since both edges go in both directions
# unweighted because edges do not have values
edge_index = torch.tensor([[0, 1], 
                           [1, 2], 
                           [1, 0], 
                           [2, 1]], dtype=torch.long)

# Graph
graph = Data(x=nodes, edge_index=edge_index.t().contiguous())

#### Information of Graph

In [4]:
# commands
cmds = [
    ['Graph Object',       'graph',                           graph],
    ['Amount of nodes',    'graph.num_nodes',                 graph.num_nodes],
    ['Amount of edges',    'graph.num_edges',                 graph.num_edges],
    ['Amount of features', 'graph.num_node_features',         graph.num_node_features],
    ['Isolated nodes',     'graph.contains_isolated_nodes()', graph.contains_isolated_nodes()],
    ['Self loops',         'graph.contains_self_loops()',     graph.contains_self_loops()],
    ['Directed',           'graph.is_directed()',             graph.is_directed()]
]

print('  Description\t\tCommand\t\t\t\t\tOutput\n')
print('-' * 99)
for description, cmd, output in cmds:
    print(f'  {description}{" " * (20 - len(description))}\t{cmd}{" " * (34 - len(cmd))}\t{output}')

  Description		Command					Output

---------------------------------------------------------------------------------------------------
  Graph Object        	graph                             	Data(edge_index=[2, 4], x=[3, 1])
  Amount of nodes     	graph.num_nodes                   	3
  Amount of edges     	graph.num_edges                   	4
  Amount of features  	graph.num_node_features           	1
  Isolated nodes      	graph.contains_isolated_nodes()   	False
  Self loops          	graph.contains_self_loops()       	False
  Directed            	graph.is_directed()               	False


#### Use Cuda

In [5]:
# transfer data object to GPU
device = torch.device('cuda')
graph = graph.to(device)

### Data Handling

In [6]:
# Dataset
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

In [7]:
# information about the dataset
print(f'Length:   {len(dataset)}\nClasses:  {dataset.num_classes}\nFeatures: {dataset.num_node_features}')

Length:   600
Classes:  6
Features: 3


In [8]:
# shuffle the dataset
dataset = dataset.shuffle()

In [9]:
# training and testing dataset
train_data = dataset[:540]
test_data  = dataset[540:]

In [10]:
# Batches
from torch_geometric.data import DataLoader

loader = DataLoader(dataset, batch_size=64, shuffle=True)

for data in loader:
    print(data)

Batch(batch=[2037], edge_index=[2, 7888], x=[2037, 3], y=[64])
Batch(batch=[1966], edge_index=[2, 7678], x=[1966, 3], y=[64])
Batch(batch=[1899], edge_index=[2, 7296], x=[1899, 3], y=[64])
Batch(batch=[2099], edge_index=[2, 8080], x=[2099, 3], y=[64])
Batch(batch=[2038], edge_index=[2, 7844], x=[2038, 3], y=[64])
Batch(batch=[2339], edge_index=[2, 8562], x=[2339, 3], y=[64])
Batch(batch=[2185], edge_index=[2, 8380], x=[2185, 3], y=[64])
Batch(batch=[2162], edge_index=[2, 8280], x=[2162, 3], y=[64])
Batch(batch=[2157], edge_index=[2, 7810], x=[2157, 3], y=[64])
Batch(batch=[698], edge_index=[2, 2746], x=[698, 3], y=[24])


### Learning Methods

In [11]:
# dataset
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [12]:
# imports
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

In [13]:
# 2-layer GCN
class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [14]:
# create model
device    = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model     = Net().to(device)
data      = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [15]:
# train model
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

In [16]:
# evaluate model
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7860


### References

[1] https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html