# Cora Classification with SVGP

## Setup

In [None]:
import tensorflow as tf
import numpy as np
import scipy.special
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import gpflow
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import networkx as nx
from gpflow.utilities import print_summary
import tensorflow_probability as tfp
import seaborn as sns
from tqdm import tqdm

import sys
import os
project_root = os.path.abspath("../..")
sys.path.append(project_root)
import pandas as pd
import pickle

from efficient_graph_gp.graph_kernels import get_normalized_laplacian
from efficient_graph_gp.gpflow_kernels import GraphDiffusionFastGRFKernel, GraphDiffusionPoFMKernel, GraphDiffusionKernel, GraphGeneralPoFMKernel, GraphGeneralFastGRFKernel
from utils import compute_fro
from cora_utils.preprocessing import load_PEMS, load_cora

In [None]:
# Global Hyperparameters
MAX_WALK_LENGTH = 3
LEARNING_RATE = 0.001
NUM_EPOCHS = 1000
M = 140 # Number of training points
batch_size = 140

In [None]:
np.random.seed(1)
num_eigenpairs = 500
dataset = 'cora'
cls_number = 7
train_num = 140

In [None]:
G, data_train, data_test = load_cora(num_train=train_num, num_test=1000)
adjacency_matrix = nx.to_numpy_array(G) 
x_train, y_train = data_train
x_test, y_test = data_test
Z = x_train[np.random.choice(train_num, M, replace=False)].copy()

### Split the 140 labeled nodes into 112 train / 28 val (stratified)

In [None]:
# x_train: shape (140, 1)    node indices for the labeled set
# y_train: shape (140,)      corresponding labels (0..6)

# Stratified 80/20 split → 112 inner‐train, 28 validation
X_all = x_train   # (140, 1)
Y_all = y_train   # (140,)

X_fit, X_val, Y_fit, Y_val = train_test_split(
    X_all,
    Y_all,
    test_size=0.2,        # 20% of 140 = 28 nodes for validation
    stratify=Y_all,       # keep class proportions
    random_state=123      # for reproducibility
)

print("X_fit  shape:", X_fit.shape, "  Y_fit shape:", Y_fit.shape)
print("X_val  shape:", X_val.shape, "  Y_val shape:", Y_val.shape)


## 1. PoFM Kernel

### Model Configuration

In [None]:
MAX_WALK_LENGTH = 3
graph_kernel = GraphGeneralPoFMKernel(adjacency_matrix,
                                      max_walk_length=MAX_WALK_LENGTH,
                                      normalize_laplacian=True)
likelihood = gpflow.likelihoods.MultiClass(num_classes=cls_number)

model = gpflow.models.SVGP(
    kernel=graph_kernel,
    likelihood=likelihood,
    inducing_variable=Z,
    num_latent_gps=cls_number,
    whiten=True,
)

### Training

In [None]:
# 2.1 Hyperparameters
LEARNING_RATE = 0.001
MAX_STEPS    = 2000
CHECK_EVERY  = 100     # run validation every 100 steps


# 2.3 Create a tf.data.Dataset for the 112 “inner” training nodes
train_ds = tf.data.Dataset.from_tensor_slices((X_fit, Y_fit)) \
                         .shuffle(len(X_fit), seed=42) \
                         .batch(len(X_fit))   \
                         .repeat()            # <-- add .repeat() here

train_iter = iter(train_ds)  # now this iterator never runs out

# 2.4 Adam optimizer
optimizer = tf.optimizers.Adam(learning_rate=LEARNING_RATE)

# 2.5 Lists to record training ELBO and validation accuracy
elbo_history     = []   # will store ELBO at each step (optional)
val_acc_history  = []   # will store (step, val_acc) pairs


In [None]:
# 2.6 Main training loop
for step in range(1, MAX_STEPS + 1):
    # Get one batch of 112 nodes (train_ds now repeats forever)
    Xb, Yb = next(train_iter)

    # Compute the negative ELBO and take a gradient step
    with tf.GradientTape() as tape:
        loss = model.training_loss((Xb, Yb))   # negative ELBO
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Record the positive ELBO (just for monitoring)
    elbo = -loss.numpy()
    elbo_history.append(elbo)

    # Every CHECK_EVERY steps, compute validation accuracy
    if step % CHECK_EVERY == 0 or step == 1:
        # Predict on the 28 validation nodes
        val_probs, _   = model.predict_y(X_val)          # tensor shape (28, cls_number)
        val_preds      = np.argmax(val_probs.numpy(), axis=1)  # shape (28,)
        val_acc        = accuracy_score(Y_val, val_preds)

        val_acc_history.append((step, val_acc))
        print(f"Step {step:4d}  | ELBO = {elbo:.4e}  | Val Acc = {val_acc*100:.2f}%")

# 2.7 End of training loop
print("\nTraining loop finished.")

In [None]:
BEST_STEP = 800   # choose from your validation curve

# 4.1 Rebuild the PoFM + SVGP model
final_kernel = GraphGeneralPoFMKernel(
    adjacency_matrix,
    max_walk_length=MAX_WALK_LENGTH,
    normalize_laplacian=True
)
final_likelihood = gpflow.likelihoods.MultiClass(num_classes=cls_number)

final_model = gpflow.models.SVGP(
    kernel=final_kernel,
    likelihood=final_likelihood,
    inducing_variable=Z,
    num_latent_gps=cls_number,
    whiten=True,
)

# 4.2 Create a dataset of all 140 nodes, repeating indefinitely
full_train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
                              .shuffle(train_num, seed=42) \
                              .batch(train_num)  \
                              .repeat()         # so next() never stops

final_train_iter = iter(full_train_ds)

# 4.3 Same optimizer
final_optimizer = tf.optimizers.Adam(learning_rate=LEARNING_RATE)

# 4.4 Training loop for exactly BEST_STEP iterations
for step in range(1, BEST_STEP + 1):
    Xb, Yb = next(final_train_iter)  # now returns all 140 each time
    with tf.GradientTape() as tape:
        loss = final_model.training_loss((Xb, Yb))
    grads = tape.gradient(loss, final_model.trainable_variables)
    final_optimizer.apply_gradients(zip(grads, final_model.trainable_variables))

# 4.5 Evaluate on the 1000 test nodes
probs_test, _ = final_model.predict_y(x_test)
y_pred_test   = np.argmax(probs_test.numpy(), axis=1)
test_acc      = accuracy_score(y_test, y_pred_test)

print(f"Final Test Accuracy (after {BEST_STEP} steps): {test_acc*100:.2f}%")