## Import Packages

In [67]:
import tensorflow as tf
import numpy as np
import gpflow
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import networkx as nx
from gpflow.utilities import print_summary
import tensorflow_probability as tfp
import seaborn as sns
import math
from tqdm import tqdm

In [68]:
import sys
import os
project_root = os.path.abspath("../..")
sys.path.append(project_root)

In [69]:
from efficient_graph_gp.random_walk_samplers import Graph, RandomWalk
from efficient_graph_gp.modulation_functions import diffusion_modulator
from efficient_graph_gp.graph_kernels.utils import get_normalized_laplacian
from utils import compute_fro

In [70]:
BETA = 0.01
MAX_WALK_LENGTH = 8

WALKS_PER_NODE = 100
P_HALT = 0.5

## Initiate a Random Graph

In [71]:
# Parameters
num_nodes = 30
graph_type = 'random'
np.random.seed(0)

In [72]:
if graph_type == 'line':
    adjacency_matrix = np.eye(num_nodes, k=1) + np.eye(num_nodes, k=-1)  # Circular adjacency matrix
elif graph_type == 'random':
    probability = 0.3 # Probability of edge creation
    G = nx.erdos_renyi_graph(num_nodes, probability, directed=False)  # Ensure the graph is undirected
    adjacency_matrix = nx.to_numpy_array(G)  # Convert to adjacency matrix

## GRF Estimator

In [73]:
modulator_vector = np.array([diffusion_modulator(step, BETA) for step in range(MAX_WALK_LENGTH)])

In [74]:
modulator_vector

array([ 1.00000000e+00, -5.00000000e-03,  1.25000000e-05, -2.08333333e-08,
        2.60416667e-11, -2.60416667e-14,  2.17013889e-17, -1.55009921e-20])

In [75]:
laplacian = get_normalized_laplacian(adjacency_matrix)
graph = Graph(laplacian)
random_walk = RandomWalk(graph)
feature_matrices = random_walk.get_random_walk_matrices(WALKS_PER_NODE,P_HALT, MAX_WALK_LENGTH) # random sampling

In [76]:
Phi = feature_matrices @ modulator_vector

## Measure $E[X]$, $E[X^2]$ and $Var(X)$ of the GRF Estimator

In [77]:
num_samples = 1000
all_samples = []

for _ in tqdm(range(num_samples)):
    new_feature_matrices = random_walk.get_random_walk_matrices(WALKS_PER_NODE, P_HALT, MAX_WALK_LENGTH)
    new_Phi = new_feature_matrices @ modulator_vector
    all_samples.append(new_Phi)

all_samples = np.array(all_samples)

# Compute Mean
Phi_avg = np.mean(all_samples, axis=0)
# Compute Variance
variances = np.var(all_samples, axis=0)
# Compute E(X^2)
E_X_squared = np.mean(all_samples**2, axis=0)

effective_num_walks_per_node = min(1/P_HALT, MAX_WALK_LENGTH)*WALKS_PER_NODE
print(f'Effective number of walks_per_node = {effective_num_walks_per_node}')

100%|██████████| 1000/1000 [00:19<00:00, 51.50it/s]

Effective number of walks_per_node = 200.0





## Theoretical Estimation of the Mean of the GRF Estimator

In [78]:
# Estimate the Variance with Graph Properties
W = laplacian

I = np.eye(W.shape[0])
W_power_series = [I]
current_power = W

for _ in range(1, MAX_WALK_LENGTH):
    W_power_series.append(current_power)
    current_power = current_power @ W

W_power_series = np.array(W_power_series)

In [79]:
Estimated_mean = np.zeros_like(W_power_series[0])

for i in range(MAX_WALK_LENGTH):
    Estimated_mean += modulator_vector[i] * W_power_series[i]

In [80]:
# Unbiased estimate

compute_fro(Phi_avg, Estimated_mean)

7.442066361689419e-05

## Theoretical Estimation of the Variance of the GRF Estimator

In [81]:
# Estimate the Variance with Graph Properties
W = laplacian
# Count the number of non-zero entries in each row of W
non_zero_counts = np.count_nonzero(W, axis=1)
# Create the N by N matrix D where each row is the same
D = np.tile(non_zero_counts, (W.shape[0], 1)).T

M = W**2 * D / (1-P_HALT)

In [82]:
D

array([[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
       [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
        12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12],
       [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16],
       [ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9],
       [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14],
       [11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
        11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11],
       [ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9],
       [13, 13, 13, 13, 13, 13, 13, 13, 1

In [83]:
modulator_vector_squared = modulator_vector**2

In [84]:
I = np.eye(M.shape[0])
M_power_series = [I]
current_power = M

for _ in range(1, MAX_WALK_LENGTH):
    M_power_series.append(current_power)
    current_power = current_power @ M

M_power_series = np.array(M_power_series)

In [85]:
Estimated_X_squared = np.zeros_like(M_power_series[0])

for i in range(MAX_WALK_LENGTH):
    Estimated_X_squared += modulator_vector_squared[i] * M_power_series[i]

In [86]:
Estimated_Variance = Estimated_X_squared - Estimated_mean**2

In [87]:
effective_theoretical_variance = Estimated_Variance / effective_num_walks_per_node

In [88]:
compute_fro(effective_theoretical_variance, variances)

0.8986032942631392

In [89]:
# Here we are doing an approximation: the subwalks are independent
effective_theoretical_variance

array([[5.22389006e-05, 1.30046160e-13, 1.76054911e-08, 3.30100721e-08,
        2.03147564e-08, 1.16297400e-13, 6.08765434e-14, 2.20073984e-08,
        1.34106859e-13, 2.40080610e-08, 8.78038188e-14, 3.08225486e-14,
        1.59927239e-13, 4.27679933e-14, 1.82997545e-13, 1.17958152e-13,
        2.93429707e-08, 6.66096792e-14, 1.22064416e-13, 2.18518752e-13,
        1.09130987e-13, 1.87431068e-13, 6.42785187e-19, 3.30098023e-08,
        2.93430173e-08, 6.29674786e-14, 9.47583813e-14, 1.67482233e-13,
        6.35239695e-14, 2.64087519e-08],
       [1.56280382e-13, 5.27387470e-05, 1.98227048e-13, 1.20799944e-13,
        2.01180006e-08, 1.23106179e-13, 1.10545339e-13, 1.24777234e-13,
        1.31810297e-13, 2.37761303e-08, 1.74358919e-08, 6.85889937e-14,
        2.90601994e-08, 1.38244738e-13, 9.77260847e-14, 2.37758792e-08,
        3.41486208e-14, 3.26909020e-08, 2.17961343e-08, 4.35877689e-08,
        8.95833006e-14, 1.35293236e-13, 9.14439129e-14, 2.22710443e-13,
        2.90596688e-08,

In [90]:
variances

array([[4.59565938e-06, 2.70371426e-13, 3.51893396e-08, 6.68428975e-08,
        4.23620602e-08, 2.08013535e-13, 1.11647219e-13, 3.82170226e-08,
        2.83953988e-13, 4.56095317e-08, 1.69329248e-13, 6.34915540e-14,
        3.26617592e-13, 8.42259757e-14, 3.52012530e-13, 2.16580947e-13,
        6.15519050e-08, 1.35624872e-13, 2.55511590e-13, 4.21534420e-13,
        2.31113365e-13, 3.93979303e-13, 1.21919921e-18, 6.07653950e-08,
        5.67165530e-08, 1.07491632e-13, 1.91091766e-13, 3.45058759e-13,
        1.27981791e-13, 5.56071373e-08],
       [2.76809642e-13, 5.77509057e-06, 3.86195948e-13, 2.65275778e-13,
        4.04520914e-08, 2.43616459e-13, 2.17989655e-13, 2.72541818e-13,
        2.65646030e-13, 4.78534331e-08, 3.46309619e-08, 1.48575310e-13,
        6.02683332e-08, 2.75649170e-13, 2.04588653e-13, 5.10996032e-08,
        8.11890495e-14, 6.91280592e-08, 4.21032267e-08, 9.74854580e-08,
        1.88144242e-13, 2.49307965e-13, 1.80924470e-13, 4.70551234e-13,
        5.37211271e-08,

In [91]:
effective_theoretical_variance / variances

array([[11.36700879,  0.48099077,  0.50030752,  0.49384562,  0.47955072,
         0.55908573,  0.54525803,  0.5758533 ,  0.47228377,  0.52638254,
         0.518539  ,  0.48545904,  0.48964674,  0.50777676,  0.51986088,
         0.54463771,  0.47671913,  0.49113174,  0.47772555,  0.51838887,
         0.47219678,  0.47573836,  0.52721916,  0.54323357,  0.51736249,
         0.58578959,  0.49587893,  0.48537308,  0.49635162,  0.47491659],
       [ 0.56457709,  9.13210736,  0.51328102,  0.45537495,  0.49732906,
         0.50532784,  0.50711278,  0.45782785,  0.49618772,  0.49685318,
         0.50347697,  0.46164463,  0.48218024,  0.50152423,  0.47767109,
         0.46528501,  0.42060624,  0.47290351,  0.51768323,  0.44712073,
         0.4761416 ,  0.54267515,  0.5054259 ,  0.47329691,  0.54093558,
         0.52095828,  0.49129361,  0.51303851,  0.54232872,  0.54628559],
       [ 0.53216163,  0.50679993,  6.68296676,  0.48584012,  0.51478656,
         0.49516758,  0.57336735,  0.50654063,  0

## Variance Bound with Graph Assumptions

In [92]:
# spectral radius of W
rho = np.max(np.abs(np.linalg.eigvals(W)))

# average degree
d = np.mean(D)

print(f'rho = {rho}, d = {d}')

rho = 1.4667889255423314, d = 11.2


In [93]:
terms = [modulator_vector_squared[i] * rho**(2*i) * d**(2*i -1) / (1-P_HALT)**i for i in range(1, MAX_WALK_LENGTH)]
terms

[0.001204823061172431,
 4.0644761044521415e-06,
 6.094012763661796e-09,
 5.139544978917207e-12,
 2.7741245569105278e-15,
 1.0398357637828398e-18,
 2.863584246942661e-22]

In [94]:
regular_variance = 1/P_HALT * (modulator_vector_squared[0] + np.sum(terms))

In [95]:
regular_variance/effective_num_walks_per_node 

0.01001208893636432