In [1]:
import numpy as np
from scipy.sparse import random, coo_matrix, csr_matrix
import time

In [2]:
import sys
import os
project_root = os.path.abspath("../..")
sys.path.append(project_root)

In [3]:
from efficient_graph_gp.random_walk_samplers import Graph, RandomWalk
from efficient_graph_gp.modulation_functions import diffusion_modulator
from efficient_graph_gp.graph_kernels import get_normalized_laplacian

In [4]:
walks_per_node = 10
p_halt = 0.2
max_walk_length = 10
beta = 0.1

num_nodes = 10000  # Number of nodes
density = 0.01    # Sparsity level (fraction of possible edges)
seed = 42          # Random seed for reproducibility

In [5]:
def generate_large_sparse_adjacency_matrix(num_nodes, density=0.01, seed=None):
    """
    Generate a large, sparse, undirected graph adjacency matrix.
    
    Parameters:
        num_nodes (int): Number of nodes in the graph.
        density (float): Fraction of the possible edges that exist (0 < density <= 1).
        seed (int or None): Random seed for reproducibility.
    
    Returns:
        adj_matrix (scipy.sparse.coo_matrix): Sparse adjacency matrix of the graph.
    """
    # Use scipy's random sparse matrix generator
    rng = np.random.default_rng(seed)
    upper_triangular = random(num_nodes, num_nodes, density=density, format='coo', random_state=rng)
    
    # Make the matrix symmetric for undirected graph
    upper_triangular = coo_matrix(upper_triangular)
    lower_triangular = coo_matrix((upper_triangular.data, (upper_triangular.col, upper_triangular.row)), shape=upper_triangular.shape)
    
    adjacency_matrix = upper_triangular + lower_triangular
    
    # Ensure no self-loops
    adjacency_matrix.setdiag(0)
    
    return adjacency_matrix

In [6]:
adj_matrix = generate_large_sparse_adjacency_matrix(num_nodes, density, seed)
dense_adj_matrix = adj_matrix.toarray()

In [7]:
laplacian = get_normalized_laplacian(dense_adj_matrix)
graph = Graph(laplacian)
random_walk = RandomWalk(graph, seed=42)
feature_matrices = random_walk.get_random_walk_matrices(walks_per_node, p_halt, max_walk_length)
modulator_vector = np.array([diffusion_modulator(step, beta) for step in range(max_walk_length)])

In [8]:
# Check the sparsity of the feature matrices
total_entries = feature_matrices.size
zero_count = np.count_nonzero(feature_matrices == 0)
percentage_zeros = (zero_count / total_entries) * 100
print(f"Percentage of zeros: {percentage_zeros:.2f}%")

Percentage of zeros: 99.96%


In [9]:
start_time = time.time()
# Use broadcasting for 3D matrix-vector multiplication
Phi = feature_matrices @ modulator_vector  # Shape: (num_nodes, num_nodes)
end_time = time.time()
print(f"Dense Timing: {end_time - start_time:.5f} seconds")

Dense Timing: 5.36412 seconds


In [10]:
# Convert the feature matrix to sparse representation
flattened_matrix = feature_matrices.reshape(-1, max_walk_length)
sparse_feature_matrices = csr_matrix(flattened_matrix)

In [11]:
# Multiplication for sparse matrix
start_time = time.time()
result = sparse_feature_matrices @ modulator_vector
end_time = time.time()
print(f"Sparse Timing: {end_time - start_time:.5f} seconds")

Sparse Timing: 0.14852 seconds


In [12]:
# Reshape the result to 3D (not necessary for timing experiments)
# Phi_sparse = result.reshape(num_nodes, num_nodes, -1)