In [1]:
import networkx as nx
import numpy as np
import time
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

In [2]:
# --- 1. Graph Generation and Ground Truth Setup (Stochastic Block Model) ---

# Define SBM parameters
n_nodes = 120
num_clusters = 3  # The known number of communities
sizes = [40, 40, 40]
# INCREASED INTER-BLOCK PROBABILITY (from 0.001 to 0.01)
# This helps ensure the graph is connected, improving solver stability.
p_matrix = [
    [0.1, 0.01, 0.01],
    [0.01, 0.1, 0.01],
    [0.01, 0.01, 0.1]
]

# Generate the graph (fixed seed for reproducibility)
G = nx.stochastic_block_model(sizes, p_matrix, seed=42)

# Extract ground truth labels
true_labels = np.array([
    G.nodes[i]['block'] 
    for i in G.nodes
])

print(f"SBM Graph generated with {n_nodes} nodes and {num_clusters} communities.")

SBM Graph generated with 120 nodes and 3 communities.


In [3]:
# --- 2. Spectral Clustering Core Function (FIXED to use only np.linalg.eigh) ---
def spectral_cluster_and_evaluate(L_matrix, true_labels, num_clusters):
    """
    Performs spectral clustering, times the process, and calculates the ARI.
    Uses np.linalg.eigh which is robust for dense, symmetric matrices.
    """
    
    start_time = time.time()
    
    # Eigen-decomposition: Using the robust dense solver
    # L_matrix MUST be a 2D NumPy array for this to work.
    try:
        eigenvalues, eigenvectors = np.linalg.eigh(L_matrix)
    except np.linalg.LinAlgError as e:
        print(f"LinAlgError during eigh: {e}")
        return None, None, None # Return None on failure
        
    # Sort eigenvalues/eigenvectors from smallest to largest
    idx = eigenvalues.argsort()
    eigenvectors = eigenvectors[:, idx]
    
    # Select the k feature vectors (discarding the first/trivial one)
    # The columns from index 1 up to num_clusters are selected.
    embedding = eigenvectors[:, 1:num_clusters + 1] 
    
    # k-means clustering
    kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init='auto')
    predicted_labels = kmeans.fit_predict(embedding)
    
    runtime = time.time() - start_time
    ari_score = adjusted_rand_score(true_labels, predicted_labels)
    
    return runtime, ari_score, predicted_labels

In [4]:
# --- 3. Construct Laplacians (FIXED: Explicit conversion to dense NumPy array) ---

# Unnormalized Laplacian (L)
L_unnorm_dense = np.array(nx.laplacian_matrix(G).todense())

# Symmetrically Normalized Laplacian (L_sym)
L_sym_norm_dense = np.array(nx.normalized_laplacian_matrix(G).todense())

In [5]:
# --- 4. Run Comparisons ---

print("\n--- Running Spectral Clustering Comparison (SBM) ---")

# Run 1: Unnormalized Laplacian (L)
runtime_L, ari_L, labels_L = spectral_cluster_and_evaluate(
    L_unnorm_dense, true_labels, num_clusters
)

if runtime_L is not None:
    print(f"\n**Unnormalized Laplacian (L):**")
    print(f"  > ARI (Clustering Quality): {ari_L:.4f}")
    print(f"  > Runtime (Spectral + KMeans): {runtime_L:.4f} seconds")

# Run 2: Symmetrically Normalized Laplacian (L_sym)
runtime_L_sym, ari_L_sym, labels_L_sym = spectral_cluster_and_evaluate(
    L_sym_norm_dense, true_labels, num_clusters
)

if runtime_L_sym is not None:
    print(f"\n**Symmetrically Normalized Laplacian (L_sym):**")
    print(f"  > ARI (Clustering Quality): {ari_L_sym:.4f}")
    print(f"  > Runtime (Spectral + KMeans): {runtime_L_sym:.4f} seconds")

print("\n--- End of Single Run ---")


--- Running Spectral Clustering Comparison (SBM) ---

**Unnormalized Laplacian (L):**
  > ARI (Clustering Quality): 0.0015
  > Runtime (Spectral + KMeans): 0.0288 seconds

**Symmetrically Normalized Laplacian (L_sym):**
  > ARI (Clustering Quality): 0.6789
  > Runtime (Spectral + KMeans): 0.0041 seconds

--- End of Single Run ---
