# Graph GP Classification

## Preamble
This notebook provides an example of Gaussian process classification on a graph using Graph Matern kernel and non-conjugate learning techiques described in the paper.

In [1]:
import tensorflow as tf
import numpy as np
import scipy.special
from sklearn.metrics import accuracy_score
import gpflow
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import networkx as nx
from gpflow.utilities import print_summary
import tensorflow_probability as tfp
import seaborn as sns
from tqdm import tqdm

import sys
import os
project_root = os.path.abspath("../..")
sys.path.append(project_root)
import pandas as pd
import pickle

from efficient_graph_gp.graph_kernels import get_normalized_laplacian
from efficient_graph_gp.gpflow_kernels import GraphDiffusionFastGRFKernel, GraphDiffusionPoFMKernel, GraphDiffusionKernel, GraphGeneralPoFMKernel, GraphGeneralFastGRFKernel
from utils import compute_fro
from cora_utils.preprocessing import load_PEMS, load_cora

In [2]:
dtype = tf.float64
gpflow.config.set_default_float(dtype)
gpflow.config.set_default_summary_fmt("notebook")
tf.get_logger().setLevel('ERROR')

## Load and preprocess the Cora dataset
The nasty details of the dataset preparation are hidden away in `preprocessing` module.

In [3]:
np.random.seed(1)
num_eigenpairs = 500
dataset = 'cora'
cls_number = 7
train_num = 140
test_num = 1000
M = 50

In [4]:
G, data_train, data_test = load_cora(num_train=train_num, num_test=test_num)
adjacency_matrix = nx.to_numpy_array(G) 
x_train, y_train = data_train
x_test, y_test = data_test
Z = x_train[np.random.choice(train_num, M, replace=False)].copy()

## Build a GP model
Here we employ techiques of non-conjugate learning described in the section 3.1 of the paper.

Briefly, we approximate a non-Gaussian posterior with a variational distribution induced by _inducing_ points on the graph.
We choose the inducing points to be all training points.

The model is then the usual `SVGP`.

In [5]:
# -------------------------------
# Run 5 independent seeds
# -------------------------------

SEEDS = [0, 1, 2, 3, 4]
seed_accuracies = []
NUM_EPOCHES = 5


for seed in SEEDS:
    print(f"\n=== Running seed {seed} ===")
    np.random.seed(seed)
    tf.random.set_seed(seed)

    # 1) Sample M inducing points from the 140 training nodes
    indices = np.random.choice(train_num, M, replace=False)
    Z = x_train[indices].copy()  # shape: (M, 1)

    # 2) Build Graph Diffusion kernel
    graph_kernel = GraphDiffusionKernel(
        adjacency_matrix=adjacency_matrix
    )

    # 3) Build SVGP model
    likelihood = gpflow.likelihoods.MultiClass(num_classes=cls_number)
    model = gpflow.models.SVGP(
        kernel=graph_kernel,
        likelihood=likelihood,
        inducing_variable=Z,
        num_latent_gps=cls_number,
        whiten=True,
    )

    # 4) Prepare full-batch training dataset (140 nodes) with infinite repeat
    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
                             .shuffle(train_num, seed=seed) \
                             .batch(train_num) \
                             .repeat()
    train_iter = iter(train_ds)

    # 5) Optimizer
    optimizer = tf.optimizers.Adam(learning_rate=0.003)

    # 6) Training loop with tqdm
    print("Training SVGP (Diffusion)…")
    for step in tqdm(range(NUM_EPOCHES), desc=f"Seed {seed} Training", unit="step"):
        Xb, Yb = next(train_iter)
        with tf.GradientTape() as tape:
            loss = model.training_loss((Xb, Yb))  # negative ELBO
        grads = tape.gradient(loss, model.trainable_variables)
        # Filter out any None gradients
        grads_and_vars = [(g, v) for g, v in zip(grads, model.trainable_variables) if g is not None]
        optimizer.apply_gradients(grads_and_vars)

    # 7) Print hyperparameter summary for this seed
    print(f"\nModel hyperparameters (seed={seed}):")
    print_summary(model)

    # 8) Evaluate on the test set
    y_pred_mean, _ = model.predict_y(x_test)
    y_pred = np.argmax(y_pred_mean.numpy(), axis=1).ravel()
    acc = accuracy_score(y_test.ravel(), y_pred)
    print(f"Seed {seed} Test Accuracy: {acc*100:.2f}%")
    seed_accuracies.append((seed, acc))

# After loop, summarize all seeds
print("\n=== Summary over all seeds ===")
for seed, acc in seed_accuracies:
    print(f"Seed {seed}: Accuracy = {acc*100:.2f}%")
mean_acc = np.mean([acc for _, acc in seed_accuracies])
std_acc  = np.std([acc for _, acc in seed_accuracies])
print(f"\nMean accuracy over seeds: {mean_acc*100:.2f}% ± {std_acc*100:.2f}%")



=== Running seed 0 ===
Training SVGP (Diffusion)…


Seed 0 Training: 100%|██████████| 5/5 [04:04<00:00, 48.97s/step]


Model hyperparameters (seed=0):





name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.beta,Parameter,Softplus,,True,(),float64,2.0087
SVGP.kernel.sigma_f,Parameter,Softplus,,True,(),float64,1.00495
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000002
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(50, 1)",float64,[[1188....
SVGP.q_mu,Parameter,Identity,,True,"(50, 7)",float64,"[[0.0150085, -0.01499765, -0.01499765..."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(7, 50, 50)",float64,"[[[1.01500000e+00, 0.00000000e+00, 0.00000000e+00..."


Seed 0 Test Accuracy: 63.30%

=== Running seed 1 ===
Training SVGP (Diffusion)…


Seed 1 Training: 100%|██████████| 5/5 [04:15<00:00, 51.11s/step]


Model hyperparameters (seed=1):





name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.beta,Parameter,Softplus,,True,(),float64,2.00869
SVGP.kernel.sigma_f,Parameter,Softplus,,True,(),float64,1.00494
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000002
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(50, 1)",float64,[[2077....
SVGP.q_mu,Parameter,Identity,,True,"(50, 7)",float64,"[[-0.01499722, -0.01499708, -0.0149972..."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(7, 50, 50)",float64,"[[[9.85014295e-01, 0.00000000e+00, 0.00000000e+00..."


Seed 1 Test Accuracy: 68.70%

=== Running seed 2 ===
Training SVGP (Diffusion)…


Seed 2 Training: 100%|██████████| 5/5 [04:10<00:00, 50.10s/step]


Model hyperparameters (seed=2):





name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.beta,Parameter,Softplus,,True,(),float64,2.0087
SVGP.kernel.sigma_f,Parameter,Softplus,,True,(),float64,1.00498
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000002
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(50, 1)",float64,[[297....
SVGP.q_mu,Parameter,Identity,,True,"(50, 7)",float64,"[[-0.01499976, -0.01499964, 0.01500797..."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(7, 50, 50)",float64,"[[[9.85014480e-01, 0.00000000e+00, 0.00000000e+00..."


Seed 2 Test Accuracy: 68.30%

=== Running seed 3 ===
Training SVGP (Diffusion)…


Seed 3 Training: 100%|██████████| 5/5 [04:13<00:00, 50.67s/step]


Model hyperparameters (seed=3):





name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.beta,Parameter,Softplus,,True,(),float64,2.0087
SVGP.kernel.sigma_f,Parameter,Softplus,,True,(),float64,1.00496
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000002
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(50, 1)",float64,[[767....
SVGP.q_mu,Parameter,Identity,,True,"(50, 7)",float64,"[[0.01465952, -0.01499721, -0.01498824..."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(7, 50, 50)",float64,"[[[9.85016192e-01, 0.00000000e+00, 0.00000000e+00..."


Seed 3 Test Accuracy: 70.10%

=== Running seed 4 ===
Training SVGP (Diffusion)…


Seed 4 Training: 100%|██████████| 5/5 [04:15<00:00, 51.10s/step]


Model hyperparameters (seed=4):





name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.beta,Parameter,Softplus,,True,(),float64,2.0087
SVGP.kernel.sigma_f,Parameter,Softplus,,True,(),float64,1.00494
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000002
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(50, 1)",float64,[[1653....
SVGP.q_mu,Parameter,Identity,,True,"(50, 7)",float64,"[[-0.01499772, -0.01499771, -0.01499668..."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(7, 50, 50)",float64,"[[[9.85014212e-01, 0.00000000e+00, 0.00000000e+00..."


Seed 4 Test Accuracy: 65.80%

=== Summary over all seeds ===
Seed 0: Accuracy = 63.30%
Seed 1: Accuracy = 68.70%
Seed 2: Accuracy = 68.30%
Seed 3: Accuracy = 70.10%
Seed 4: Accuracy = 65.80%

Mean accuracy over seeds: 67.24% ± 2.41%
