# Geometric Deep Learning
*Hilary Term 2023*

---
This notebook conducts a number of experiments to validate the claim that the removal of the encoder in the Gradient Flow Framework (GRAFF) does not significantly diminish performance.

Run the cell below to install the necessary dependencies for this notebook.

In [1]:
!pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.12.1+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.12.1+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install ogb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://download.pytorch.org/whl/cu113
Collecting torch==1.12.1+cu113
  Downloading https://download.pytorch.org/whl/cu113/torch-1.12.1%2Bcu113-cp39-cp39-linux_x86_64.whl (1837.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 GB[0m [31m983.8 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.13.1+cu113
  Downloading https://download.pytorch.org/whl/cu113/torchvision-0.13.1%2Bcu113-cp39-cp39-linux_x86_64.whl (23.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.4/23.4 MB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchaudio==0.12.1
  Downloading https://download.pytorch.org/whl/cu113/torchaudio-0.12.1%2Bcu113-cp39-cp39-linux_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m93.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch, torchvis

Run the cell below to import the necessary libraries and provided external code.

In [1]:
import numpy as np
import torch.nn as nn

from typing import List, Tuple, Callable

from utils import train
from models import MLP, GCN, StandardGRAFF, get_device

from torch_geometric.data.data import Data
from torch_geometric.data.dataset import Dataset

from torch_geometric.datasets import Planetoid, Coauthor
from torch_geometric.transforms import ToUndirected, RandomNodeSplit

---
## Datasets
The cell below downloads the five inductive node classification datasets.

In [None]:
# Load the CS and Physics co-author datasets.
datasets = [
    Coauthor(root='/tmp', name='CS'),
    Coauthor(root='/tmp', name='Physics')
]

# Load the three citation network datasets: Cora, CiteSeer and PubMed.
datasets.extend([
    Planetoid(
        root='/tmp', 
        name=name, 
        split='geom-gcn',
        transform=ToUndirected()
    ) for name in ['Cora', 'CiteSeer', 'PubMed']
])

# Extract the (single) graph from these datasets and move to GPU (if available).
datasets = [(dataset, dataset[0].to(get_device())) for dataset in datasets]

---
## Experimental Setup
The cell below defines the experimental setup to be run for each model type and dataset.

In [None]:
def run_experiment(
    datasets: List[Tuple[Dataset, Data]], 
    learning_rates: List[float], 
    model_func: Callable, 
    num_runs: int = 10) -> None:
    
    # Iterate over each of the given datasets.
    for learning_rate, (dataset, data) in zip(learning_rates, datasets):
        test_accs = []

        # Iterate for the given number of runs.
        for run in range(num_runs):
            # Get a new model.
            model = model_func(dataset)
            
            # Use a random train/validation/test split for the co-author datasets.
            if isinstance(dataset, Coauthor):
                RandomNodeSplit(split='train_rest', num_val=0.1, num_test=0.2)(data)
                train_mask = data.train_mask
                val_mask = data.val_mask
                test_mask = data.test_mask
            else:
                # Use the given split for the citation datasets.
                train_mask = data.train_mask[:, run].bool()
                val_mask = data.val_mask[:, run].bool()
                test_mask = data.test_mask[:, run].bool()

            # Train the model and record the test accuracy.
            test_acc = train(
                model,
                data.x,
                data.y,
                data.edge_index,
                train_mask,
                val_mask,
                test_mask,
                learning_rate,
                verbose=False
            )
            test_accs.append(test_acc)

        name = dataset.__class__.__name__
        if hasattr(dataset, 'name'):
            name += '-' + dataset.name

        print(f'{name}: test_accuracy={np.mean(test_accs):.3f}±{np.std(test_accs):.3f}')

---
## Multi-layer Perceptron

In [5]:
hidden_dim = 128

def model_func_mlp(dataset: Dataset) -> nn.Module:
    return MLP(
        dataset.num_features,
        hidden_dim,
        dataset.num_classes
    )

learning_rates = (0.001, 0.001, 0.001, 0.001, 0.01)
run_experiment(datasets, learning_rates, model_func_mlp)

Coauthor-CS: test_accuracy=0.951±0.005
Coauthor-Physics: test_accuracy=0.960±0.003
Planetoid-Cora: test_accuracy=0.733±0.020
Planetoid-CiteSeer: test_accuracy=0.717±0.015
Planetoid-PubMed: test_accuracy=0.861±0.005


---
## Graph Convolution Network

In [6]:
hidden_dim = 128

def model_func_gcn(dataset: Dataset) -> nn.Module:
    return GCN(
        dataset.num_features,
        hidden_dim,
        dataset.num_classes,
        num_gcn_layers=2
    )

learning_rates = (0.001, 0.001, 0.001, 0.001, 0.01)
run_experiment(datasets, learning_rates, model_func_gcn)

Coauthor-CS: test_accuracy=0.934±0.004
Coauthor-Physics: test_accuracy=0.963±0.002
Planetoid-Cora: test_accuracy=0.854±0.006
Planetoid-CiteSeer: test_accuracy=0.728±0.015
Planetoid-PubMed: test_accuracy=0.871±0.005


---
## GRAFF with Encoder

In [7]:
hidden_dim = 128

def model_func_encoder_graff(dataset: Dataset) -> nn.Module:
    data = dataset[0].to(get_device())
    return StandardGRAFF(
        dataset.num_features,
        hidden_dim,
        dataset.num_classes,
        data.edge_index,
        data.num_nodes,
        num_graff_layers=2,
        use_encoder=True
    )

learning_rates = (0.001, 0.001, 0.001, 0.001, 0.01)
run_experiment(datasets, learning_rates, model_func_encoder_graff)

Coauthor-CS: test_accuracy=0.955±0.003
Coauthor-Physics: test_accuracy=0.971±0.002
Planetoid-Cora: test_accuracy=0.859±0.010
Planetoid-CiteSeer: test_accuracy=0.746±0.017
Planetoid-PubMed: test_accuracy=0.877±0.004


---
## GRAFF without Encoder

In [4]:
def model_func_no_encoder_graff(dataset: Dataset) -> nn.Module:
    data = dataset[0].to(get_device())
    return StandardGRAFF(
        dataset.num_features,
        dataset.num_features,
        dataset.num_classes,
        data.edge_index,
        data.num_nodes,
        num_graff_layers=2,
        use_encoder=False
    )

learning_rates = (0.001, 0.001, 0.001, 0.001, 0.01)
run_experiment(datasets, learning_rates, model_func_no_encoder_graff)

Coauthor-CS: test_accuracy=0.948±0.003
Coauthor-Physics: test_accuracy=0.968±0.003
Planetoid-Cora: test_accuracy=0.846±0.014
Planetoid-CiteSeer: test_accuracy=0.755±0.017
Planetoid-PubMed: test_accuracy=0.856±0.009
