In [4]:
import numpy as np
import time

In [5]:
import sys
import os
project_root = os.path.abspath("../..")
sys.path.append(project_root)

In [6]:
from efficient_graph_gp.random_walk_samplers import Graph, RandomWalk

In [7]:
# Parameters for the random walk
num_walks = 100000
max_walk_length = 4
p_halt = 0.5

In [8]:
# Example 8x8 undirected graph adjacency matrix
full_adjacency_matrix = np.array([
    [0, 1, 0, 1, 0, 0, 0, 1],
    [1, 0, 1, 0, 0, 0, 1, 0],
    [0, 1, 0, 1, 0, 0, 0, 1],
    [1, 0, 1, 0, 1, 0, 0, 0],
    [0, 0, 0, 1, 0, 1, 0, 1],
    [0, 0, 0, 0, 1, 0, 1, 0],
    [0, 1, 0, 0, 0, 1, 0, 1],
    [1, 0, 1, 0, 1, 0, 1, 0]
], dtype=float)

main_adjacency_matrix = full_adjacency_matrix[:-1, :-1]

In [9]:
# Create Graph instance
main_graph = Graph(adjacency_matrix=main_adjacency_matrix)

# Create RandomWalk instance
main_random_walk = RandomWalk(main_graph, seed=42)

# Perform the random walks and get the feature matrices as a NumPy array
main_feature_matrices = main_random_walk.get_random_walk_matrices(num_walks, p_halt, max_walk_length)

# Output the feature matrices
print("Feature matrices shape:", main_feature_matrices.shape)
for start_node in range(1):
    print(f"\nFeature matrix for start node {start_node}:")
    print(main_feature_matrices[start_node])


Feature matrices shape: (7, 7, 4)

Feature matrix for start node 0:
[[1.      0.      1.99008 0.     ]
 [0.      1.00068 0.      4.95936]
 [0.      0.      1.95984 0.     ]
 [0.      1.00256 0.      5.04864]
 [0.      0.      1.02984 0.     ]
 [0.      0.      0.      2.0016 ]
 [0.      0.      1.00776 0.     ]]


In [10]:
# Create Graph instance
full_graph = Graph(adjacency_matrix=full_adjacency_matrix)

# Create RandomWalk instance
full_random_walk = RandomWalk(full_graph, seed=42)

start_time = time.time()
# Perform the random walks and get the feature matrices as a NumPy array
full_feature_matrices = full_random_walk.get_random_walk_matrices(num_walks, p_halt, max_walk_length)
end_time = time.time()

naiive_update_time = end_time - start_time
print("Time for naiive update:", naiive_update_time)

# Output the feature matrices
print("Feature matrices shape:", full_feature_matrices.shape)
for start_node in range(1):
    print(f"\nFeature matrix for start node {start_node}:")
    print(full_feature_matrices[start_node])


Time for naiive update: 4.956399917602539
Feature matrices shape: (8, 8, 4)

Feature matrix for start node 0:
[[1.      0.      2.98728 0.     ]
 [0.      1.0059  0.      7.98624]
 [0.      0.      2.94    0.     ]
 [0.      0.99492 0.      8.00712]
 [0.      0.      2.01636 0.     ]
 [0.      0.      0.      4.08744]
 [0.      0.      2.03964 0.     ]
 [0.      1.00404 0.      9.93672]]


In [11]:
def compute_C_naive(M):
    """
    Computes the matrix C where C[i, j, k] is the number of k-step paths
    from node i to node j that pass through node X, using naive convolution.

    Parameters:
    M (ndarray): An (N x P) matrix where M[i, p] is the number of paths
                 from node i to node X in p steps (p from 0 to P - 1).

    Returns:
    C (ndarray): An (N x N x P) matrix where C[i, j, k] is the number
                 of k-step paths from node i to node j passing through X.
                 k ranges from 0 to P - 1.
    """
    N, P = M.shape
    C = np.zeros((N, N, P))
    
    for i in range(N):
        for j in range(N):
            # Perform convolution of M[i, :] and M[j, :]
            C_ij = np.convolve(M[i, :], M[j, :])
            # Extract the relevant part of the convolution result (first P steps)
            C[i, j, :] = C_ij[:P]
    return C

def compute_C_fft(M):
    """
    Computes the matrix C where C[i, j, k] is the number of k-step paths
    from node i to node j that pass through node X, using FFT-based convolution.

    Parameters:
    M (ndarray): An (N x P) matrix where M[i, p] is the number of paths
                 from node i to node X in p steps (p from 0 to P - 1).

    Returns:
    C (ndarray): An (N x N x P) matrix where C[i, j, k] is the number
                 of k-step paths from node i to node j passing through X.
                 k ranges from 0 to P - 1.
    """
    N, P = M.shape
    # Determine the length for zero-padding, next power of 2 for efficiency
    L = 2 ** int(np.ceil(np.log2(2 * P - 1)))
    
    # Zero-pad the sequences to length L
    M_padded = np.zeros((N, L))
    M_padded[:, :P] = M
    
    # Compute the FFT along the last axis (step dimension)
    M_fft = np.fft.fft(M_padded, axis=1)
    
    # Compute the outer product of M_fft to get all pairwise products
    # Resulting shape will be (N, N, L)
    # Broadcasting is used to vectorize the computation
    C_fft = M_fft[:, np.newaxis, :] * M_fft[np.newaxis, :, :]
    
    # Compute the inverse FFT to get the convolutions in time domain
    C_padded = np.fft.ifft(C_fft, axis=2).real
    
    # Extract the relevant part of the convolution result (first P steps)
    C = C_padded[:, :, :P]
    
    return C

In [12]:
# Compute the full p-step walk matrices (feature matrices) via lazy update

# Time the computation
start_time = time.time()

num_nodes = full_adjacency_matrix.shape[0]
# Perform multiple walks for the new node
walk_matrix_new_node = full_random_walk._perform_multiple_walks(
    start_node=num_nodes - 1, num_walks=num_walks, p_halt=p_halt, max_walk_length=max_walk_length
)
# Pad the main feature matrices
main_feature_matrices_padded = np.pad(main_feature_matrices, ((0, 1), (0, 1), (0, 0)), mode='constant')
# Add the new node's walk matrix to the main feature matrices
main_feature_matrices_padded[:, -1, :] = main_feature_matrices_padded[-1, :, :] = walk_matrix_new_node
# Compute additional path count due to the new node
additional_path_counts = compute_C_naive(walk_matrix_new_node)
# Don't need to update the walk matrix for the new node
additional_path_counts[:, -1, :] = 0 
additional_path_counts[-1, :, :] = 0 
main_feature_matrices_padded += additional_path_counts
lazy_update_full_feature_matrices = main_feature_matrices_padded

end_time = time.time()

lazy_time = end_time - start_time
print(f"Lazy update time: {lazy_time:.4f} seconds")


Lazy update time: 0.8741 seconds


In [13]:
print(f"Naiive update time: {naiive_update_time:.4f} seconds")
print(f"Lazy update time: {lazy_time:.4f} seconds")

Naiive update time: 4.9564 seconds
Lazy update time: 0.8741 seconds


In [14]:
# Calculate the relative frobenius norm of the difference between the two methods
diff = np.linalg.norm(lazy_update_full_feature_matrices - full_feature_matrices) / np.linalg.norm(full_feature_matrices)
print(f"Relative Frobenius norm of the difference: {diff:.4f}")

Relative Frobenius norm of the difference: 0.0177
