# Entropy Vector for Hypergraphs (DHG)

Implementation of the entropy vector $SE(H)$ from *"A New Entropy for Hypergraphs"*, using the [DeepHypergraph](https://github.com/iMoonLab/DeepHypergraph) library.

The entropy vector is a $2^m - 1$ dimensional descriptor built by computing Shannon entropy over all partial hypergraphs.

In [None]:
import torch
import numpy as np
from itertools import combinations
from math import comb
from dhg import Hypergraph

## 1. Core Entropy Function $S(H)$

Given a `dhg.Hypergraph`:

1. Extract the incidence matrix $I = H^T$ (shape $m \times n$) from `hg.H_T`
2. Compute $L(H) = I \cdot I^T$ where $L_{i,j} = |e_i \cap e_j|$
3. Eigendecompose, normalize: $\mu_i = \lambda_i / \mathrm{Tr}(L)$
4. Shannon entropy: $S(H) = -\sum \mu_i \log_2(\mu_i)$

In [None]:
def compute_entropy(hg):
    """Compute Shannon entropy S(H) for a dhg.Hypergraph.

    Args:
        hg: dhg.Hypergraph instance.

    Returns:
        entropy: float, Shannon entropy in bits.
    """
    # H_T is (num_e, num_v) — the incidence matrix I
    I = hg.H_T.to_dense()

    # L(H) = I * I^T, entry (i,j) = |e_i ∩ e_j|
    L = I @ I.T

    eigenvalues = torch.linalg.eigvalsh(L)

    # Filter out zero/negative eigenvalues (numerical noise)
    eigenvalues = eigenvalues[eigenvalues > 1e-12]

    trace = eigenvalues.sum()
    if trace < 1e-12:
        return 0.0

    mu = eigenvalues / trace
    entropy = torch.sum(-mu * torch.log2(mu)).item() #The paper shows log2 in the equation but their results imply they used log10
    return entropy

## 2. Entropy Vector $SE(H)$

Iterate over all $2^m - 1$ non-empty subsets of hyperedges, grouped by subset size.
For each subset, build a partial `dhg.Hypergraph` and compute its entropy.
Sort entropies in increasing order within each scale, then concatenate.

In [None]:
def entropy_vector(hg):
    """Compute the full entropy vector SE(H) for a dhg.Hypergraph.

    Args:
        hg: dhg.Hypergraph instance.

    Returns:
        se: np.ndarray of length 2^m - 1.
    """
    edge_list = hg.e[0]  # list of tuples
    m = hg.num_e
    n = hg.num_v

    se = []
    for size in range(1, m + 1):
        entropies = []
        for subset_indices in combinations(range(m), size):
            partial_edges = [list(edge_list[i]) for i in subset_indices]
            partial_hg = Hypergraph(n, partial_edges)
            entropies.append(compute_entropy(partial_hg))
        entropies.sort()
        se.extend(entropies)

    return np.array(se)

## 3. Example

A small hypergraph with 6 vertices and 3 hyperedges:
- $e_0 = \{0, 1, 2\}$
- $e_1 = \{1, 2, 3\}$
- $e_2 = \{3, 4, 5\}$

With $m=3$ we get $2^3 - 1 = 7$ entries in the entropy vector.

In [None]:
#hg = Hypergraph(6, [[0, 1, 2], [1, 2, 3], [3, 4, 5]])
# hg = Hypergraph(6, [[0, 1, 2], [0,3], [0,4,5]])

hg = Hypergraph(6, [[0,1,2], [3,4], [4,5]])

print(f"Vertices: {hg.num_v}, Hyperedges: {hg.num_e}")
print(f"Edge list: {hg.e[0]}")
print(f"Vertex degrees: {hg.deg_v}")
print(f"Edge sizes: {hg.deg_e}")


# Incidence matrix I = H_T (m x n)
I = hg.H_T.to_dense()
print(f"\nIncidence matrix I (H_T), shape {list(I.shape)}:")
print(I)

# Intersection matrix L = I * I^T
L = I @ I.T
print(f"\nIntersection matrix L = I * I^T:")
print(L)
print(f"|e0 ∩ e1| = {int(L[0,1])}, |e0 ∩ e2| = {int(L[0,2])}, |e1 ∩ e2| = {int(L[1,2])}")

hg.draw()

In [None]:
# Full hypergraph entropy
S_full = compute_entropy(hg)
print(f"S(H) for full hypergraph: {S_full:.4f} bits")

# Entropy vector
se = entropy_vector(hg)
print(f"\nEntropy vector SE(H) ({len(se)} entries):")

idx = 0
for size in range(1, hg.num_e + 1):
    count = comb(hg.num_e, size)
    block = se[idx:idx + count]
    print(f"  Size {size} ({count} subsets): {np.round(block, 4)}")
    idx += count

## 4. Verify Against NumPy Implementation

Cross-check with the raw numpy implementation from `entropy_vector.ipynb`.

In [None]:
def compute_entropy_numpy(incidence_matrix):
    """Reference numpy implementation."""
    L = incidence_matrix @ incidence_matrix.T
    eigenvalues = np.linalg.eigvalsh(L)
    eigenvalues = eigenvalues[eigenvalues > 1e-12]
    trace = eigenvalues.sum()
    if trace < 1e-12:
        return 0.0
    mu = eigenvalues / trace
    return -np.sum(mu * np.log2(mu))

def entropy_vector_numpy(hyperedges, num_vertices):
    """Reference numpy implementation."""
    m = len(hyperedges)
    I_np = np.zeros((m, num_vertices), dtype=np.float64)
    for i, edge in enumerate(hyperedges):
        for v in edge:
            I_np[i, v] = 1.0
    se = []
    for size in range(1, m + 1):
        entropies = []
        for subset_indices in combinations(range(m), size):
            entropies.append(compute_entropy_numpy(I_np[list(subset_indices), :]))
        entropies.sort()
        se.extend(entropies)
    return np.array(se)

se_np = entropy_vector_numpy([[0,1,2], [3,4], [4,5]], 6)
print(f"DHG:   {np.round(se, 4)}")
print(f"NumPy: {np.round(se_np, 4)}")
print(f"Match: {np.allclose(se, se_np)}")