# Graph Classification with Graph Neural Networks
- Embed entire graphs
* Coomon task: Molecular property prediction
    - Molecules are representd as graphs
    - Infer whetger a molecule inhibits HIV cirus replication or not

## TUDatasets
- MUTAG dataset

In [1]:
import torch
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[1]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_index=[2, 28], x=[13, 7], edge_attr=[28, 4], y=[1])
Number of nodes: 13
Number of edges: 28
Average node degree: 2.15
Has isolated nodes: False
Has self-loops: False
Is undirected: True


- Classify 188 different graphs into **one out two classes**

In [2]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:150]
test_dataset = dataset[150:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 150
Number of test graphs: 38


### Mini-batching of graphs
- Batch the graphs
- **CNN** 
    * Typically achieved by rescaling or padding each example into a set of equally sized shapes
    * Then grouped in an additional dimension
    * Not feasible or may result  a lot of unnecessary meomory consumption for GNN
- **GNN**
- Adjacency metrices are stacked in a diagonal fashion
- Node and target features are simly concated
- Advamtages:
    1. GNN operators that rely on a message passing scheme do not need to be modified since messages are not exchanged between two nodes that belong to different graphs.
    2. There is no computational or memory overhead since adjacency matrices are saved in a sparse fashion holding only non-zero entries, i.e., the edges.


In [3]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 2636], x=[1188, 7], edge_attr=[2636, 4], y=[64], batch=[1188], ptr=[65])

Step 2:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 2506], x=[1139, 7], edge_attr=[2506, 4], y=[64], batch=[1139], ptr=[65])

Step 3:
Number of graphs in the current batch: 22
DataBatch(edge_index=[2, 852], x=[387, 7], edge_attr=[852, 4], y=[22], batch=[387], ptr=[23])



### Training a Graph Neural Network (GNN)


1. Embed each node by performing multiple rounds of message passing
2. Aggregate node embeddings into a unified graph embedding (readout layer)
3. Train a final classifier on the graph embedding

In [4]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        print(x.size())
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [6]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         #print(data)
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 20):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

<IPython.core.display.Javascript object>

DataBatch(edge_index=[2, 2454], x=[1118, 7], edge_attr=[2454, 4], y=[64], batch=[1118], ptr=[65])
torch.Size([1118, 64])
DataBatch(edge_index=[2, 2712], x=[1220, 7], edge_attr=[2712, 4], y=[64], batch=[1220], ptr=[65])
torch.Size([1220, 64])
DataBatch(edge_index=[2, 828], x=[376, 7], edge_attr=[828, 4], y=[22], batch=[376], ptr=[23])
torch.Size([376, 64])
torch.Size([1153, 64])
torch.Size([1169, 64])
torch.Size([392, 64])
torch.Size([657, 64])
Epoch: 001, Train Acc: 0.6467, Test Acc: 0.7368
DataBatch(edge_index=[2, 2568], x=[1160, 7], edge_attr=[2568, 4], y=[64], batch=[1160], ptr=[65])
torch.Size([1160, 64])
DataBatch(edge_index=[2, 2502], x=[1138, 7], edge_attr=[2502, 4], y=[64], batch=[1138], ptr=[65])
torch.Size([1138, 64])
DataBatch(edge_index=[2, 924], x=[416, 7], edge_attr=[924, 4], y=[22], batch=[416], ptr=[23])
torch.Size([416, 64])
torch.Size([1173, 64])
torch.Size([1162, 64])
torch.Size([379, 64])
torch.Size([657, 64])
Epoch: 002, Train Acc: 0.6467, Test Acc: 0.7368
DataBatc

### Neighborhood normalization decreases the expressivity of GNNs in distinguishing certain graph structures
- Omits neighborhood normalization completely
- Adds a simple skip-connection to the GNN layer in order to preserve central node information

In [6]:
from torch_geometric.nn import GraphConv


class GNN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GNN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GraphConv(dataset.num_node_features, hidden_channels)  # TODO
        self.conv2 = GraphConv(hidden_channels, hidden_channels)  # TODO
        self.conv3 = GraphConv(hidden_channels, hidden_channels)  # TODO
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GNN(hidden_channels=64)
print(model)

GNN(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [7]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GNN(hidden_channels=64)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(1, 201):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

<IPython.core.display.Javascript object>

GNN(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)
Epoch: 001, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 002, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 003, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 004, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 005, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 006, Train Acc: 0.6800, Test Acc: 0.7632
Epoch: 007, Train Acc: 0.7667, Test Acc: 0.8158
Epoch: 008, Train Acc: 0.7200, Test Acc: 0.8158
Epoch: 009, Train Acc: 0.7533, Test Acc: 0.8158
Epoch: 010, Train Acc: 0.7933, Test Acc: 0.8421
Epoch: 011, Train Acc: 0.7533, Test Acc: 0.8158
Epoch: 012, Train Acc: 0.7667, Test Acc: 0.8684
Epoch: 013, Train Acc: 0.7733, Test Acc: 0.8421
Epoch: 014, Train Acc: 0.8000, Test Acc: 0.8684
Epoch: 015, Train Acc: 0.7867, Test Acc: 0.7632
Epoch: 016, Train Acc: 0.7800, Test Acc: 0.8421
Epoch: 017, Train Acc: 0.8067, Test Acc: 0.8421
Epoch: 018, Train Acc: 0.7800, T