In [1]:
from torch_geometric.datasets import WebKB
import torch
from torch_geometric.data import Data
from sklearn.cluster import KMeans
from torch_geometric.utils import to_dense_adj
from torch_geometric.utils import to_undirected



# Data Preprocessing

In [2]:
dataset = WebKB(root='./', name='Cornell')
data = dataset[0]
data.edge_index = to_undirected(data.edge_index)

# Spectral Clustering

In [3]:
def count_degrees(graph):
    num_nodes = graph.num_nodes
    edge_index = graph.edge_index
    degrees = torch.zeros(num_nodes, dtype=torch.long)
    for e in range(edge_index.shape[1]):
        src, dst = edge_index[:, e]
        degrees[dst] += 1
    return degrees

In [4]:
# Calculate Laplacian
deg = count_degrees(data)
adj = to_dense_adj(data.edge_index)[0]
laplacian = torch.diag(deg) - adj

In [5]:
# Calculate eigen vectors
num_eigenvectors = 3
eigenvalues, eigenvectors = torch.linalg.eigh(laplacian)
eigenvectors = eigenvectors[:, :num_eigenvectors]

In [6]:
# Do Kmeans
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=1000)
kmeans.fit(eigenvectors.detach().numpy())
labels_ratio_cut = kmeans.labels_
labels_ratio_cut

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0])

## Modularity Maximization

In [7]:
# Calculate Modularity matrix
deg1 = deg[:, None]
modularity = adj - deg1 @ deg1.T / (2 * data.num_nodes)
modularity.shape

torch.Size([183, 183])

In [8]:
# Calculate eigen vectors
num_eigenvectors = 3
eigenvalues, eigenvectors = torch.linalg.eigh(modularity)
eigenvectors = eigenvectors[:, :num_eigenvectors]

In [9]:
# Do Kmeans
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=1000)
kmeans.fit(eigenvectors.detach().numpy())
labels_modularity = kmeans.labels_
labels_modularity

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
       0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
       0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 0])