# Toy topology and signals dataset

In [1]:
import numpy as np 
from itertools import combinations 
from tqdm import tqdm 
from sklearn.linear_model import OrthogonalMatchingPursuit
from matplotlib import pyplot as plt 

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Fourier basis + Canonical basis

def FB(
    D:int,
): 

    k = np.arange(D).reshape((D, 1))
    n = np.arange(D).reshape((1, D))

    return np.asmatrix((1/np.sqrt(D)*np.exp(-2j * np.pi * k * n / D)))

In [3]:
B = np.eye(5)

In [4]:
def GrassmanDistance(
        U_A,
        U_B
):
    assert U_A.shape == U_B.shape
    
    _, S, _ = np.linalg.svd(U_A.T @ U_B)
    S = np.clip(S, -1.0, 1.0)
    
    return np.linalg.norm(np.arccos(S))

In [5]:
def subSpaceAssignment(N:int,
                       D:int,
                       K:int):
    
    combo = list(combinations(range(D), K))
    patterns = np.random.choice(len(combo), N, replace=True)
    return {n: np.array(combo[patterns[n]], dtype = 'int32') for n in range(N)}

In [6]:
def premultiplier(Xu, Xv):
    uu = np.linalg.pinv(Xu @ Xu.T)
    uv = Xu @ Xv.T
    vv = np.linalg.pinv(Xv @ Xv.T)
    vu = Xv @ Xu.T

    return (uu, uv, vv, vu)

def chi_u(uu, uv, vv, vu):

    return ((uu @ uv - np.eye(uu.shape[0])) @ vv @ np.linalg.pinv(vu @ uu @ uv @ vv - np.eye(uu.shape[0])) @ vu - np.eye(uu.shape[0])) @ uu

def chi_v(uu, uv, vv, vu):

    return (uu @ uv - np.eye(uu.shape[0])) @ vv @ np.linalg.pinv(vu @ uu @ uv @ vv - np.eye(uu.shape[0]))

________________

In [7]:
V = 4
edges = [
    (0,1),
    (1,2),
    (1,3),
    (0,2)
]

nodes = [i for i in range(V)]
E = len(edges)

In [8]:
d = 20
k = 5
N = 100000

In [9]:
D = FB(d)

In [10]:
subspaces = {0:np.array([0,1,2,3]),
             1:np.array([3,4,5,6]),
             2:np.array([1,2,3,4]),
             3:np.array([6,7,8,9])}

In [11]:
B = np.eye(d)

In [12]:
list(combinations(range(V), 2))

[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]

In [13]:
pair_wise_subspaces_distance = {
    edge: GrassmanDistance(B[:,subspaces[edge[0]]], B[:,subspaces[edge[1]]])
    for edge in list(combinations(range(V), 2))
}

In [14]:
edges

[(0, 1), (1, 2), (1, 3), (0, 2)]

In [15]:
sorted(pair_wise_subspaces_distance.items(), key = lambda x: x[1])

[((0, 2), 1.5707963267948966),
 ((1, 2), 2.221441469079183),
 ((0, 1), 2.7206990463513265),
 ((1, 3), 2.7206990463513265),
 ((0, 3), 3.141592653589793),
 ((2, 3), 3.141592653589793)]

In [16]:
signals = {node: B[:,subspaces[node]] @ np.random.randn(4, N) + np.random.randn(d,N)*0.01
           for node in nodes}

In [17]:
signals[0].shape

(20, 100000)

In [18]:
T = 0

H = {
    edge : {
        edge[0] : None,
        edge[1] : None
    }
for edge in combinations(nodes, 2)
}

In [19]:
for e in tqdm(combinations(nodes,2)):
    u = e[0]
    v = e[1]

    X_u = signals[u]
    X_v = signals[v]
    uu, uv, vv, vu = premultiplier(X_u, X_v)

    H[e][u] = chi_u(uu, uv, vv, vu)
    H[e][v] = chi_u(uu, uv, vv, vu)
    
    T += np.trace(H[e][u]) + np.trace(H[e][v])

6it [00:00, 36.84it/s]


In [20]:
mu = T

In [21]:
H = {
    edge : {
        edge[0] : mu/T * (H[edge][edge[0]]),
        edge[1] : mu/T * (H[edge][edge[1]])
    }
for edge in combinations(nodes, 2)
}

In [22]:
all_edges = list(combinations(range(V), 2))

energies = {
    e : 0
    for e in all_edges
    }

for e in (all_edges):
    u = e[0]
    v = e[1]
    
    '''
    X_ = np.zeros_like(X)
    X_[u*d:(u+1)*d,:] = X[u*d:(u+1)*d,:]
    X_[v*d:(v+1)*d,:] = X[v*d:(v+1)*d,:]
    energies[e] = np.linalg.norm(BB @ X_)
    '''

    energies[e] = np.linalg.norm(H[e][e[0]] @ signals[e[0]] - H[e][e[1]] @ signals[e[1]])

In [23]:
retrieved = sorted(energies.items(), key=lambda x:x[1])[:E]

In [24]:
print(f'Accuracy in retrieving underlying graph {len(set(list(map(lambda x: x[0], retrieved))).intersection(set(edges))) / E}')

Accuracy in retrieving underlying graph 1.0


In [25]:
retrieved

[((0, 2), 31.56525172391306),
 ((1, 2), 44.71663006862969),
 ((0, 1), 54.703836937560396),
 ((1, 3), 54.90510423994355)]

_______________

# Randomly generated signals with $\ell_0$ group penalty on canonical basis

In [26]:
# Let's generate a toy topology for our example

nodes = [i for i in range(7)]
edges = [
    (0,1),
    (0,3),
    (0,6),
    (1,2),
    (1,5),
    (2,4),
    (4,6),
    (5,6)
]

V = 7
E = len(edges)

d = 10                                          # Node and edges stalks dimension

F = {
    e:{
        e[0]:np.random.randn(d,d),
        e[1]:np.random.randn(d,d)
        } 
        for e in edges
    }                                           # Incidency linear maps


In [27]:
# Sheaf representation 

# Coboundary map

B = np.zeros((d*E, d*V))

for i in range(len(edges)):
    edge = edges[i]

    u = edge[0] 
    v = edge[1] 

    B_u = F[edge][u]
    B_v = F[edge][v]

    B[i*d:(i+1)*d, u*d:(u+1)*d] = B_u
    B[i*d:(i+1)*d, v*d:(v+1)*d] = - B_v

# (Normalized) Sheaf Laplacian

L_f = B.T @ B
L_f /= np.max(np.linalg.eigvals(L_f))

In [47]:
# Data generation 
np.random.seed(42)
N = 100
X = np.random.randn(V*d,N)

# TODO Do we need here a proper rotation matrix? 

# Tikhonov inspired filtering

Lambda, U = np.linalg.eig(L_f)
H = 1/(1 + 10*Lambda)

# Filter the signal accordingly to the true sheaf laplacian

Y = U @ np.diag(H) @ U.T @ X

# Normalize data
Y = np.apply_along_axis(lambda d: d/np.linalg.norm(d), axis=0, arr=Y)

# Add noise
#Y += np.random.normal(0, 10e-2, size=Y.shape)

# Remap the signals back on each node

signals = {
    node: Y[node*d:(node + 1)*d,:]
    for node in nodes
    }

In [48]:
def block_ortho_match_pursuit(Y, D, K):

    S = []

    X = np.zeros((D.shape[1], Y.shape[1]))
    iters = 0
    R = Y

    while iters < K:

        # Retrieve the maximum correlation between atoms and residuals of the previous iteration
        S.append(np.argmax(np.linalg.norm(D.T @ R, axis = 1)))

        # Expand the dictionary for the representation
        dic = D[:,S]

        # Solve subproblems and update x
        X[S] = np.linalg.pinv(dic) @ Y
        
        # Update the residuals
        R = Y - D @ X
        iters += 1

    return X

In [30]:
def OMP(Y, D, T0):
    
    batch_size, _ = Y.shape
    dictionary_dim = D.shape[1]

    # Initialize the coefficient matrix
    X = np.zeros((batch_size, dictionary_dim))

    # Initialize the OMP model
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=T0)

    # Loop through each sample in the batch
    for i in range(batch_size):
        # Fit the model to each sample
        omp.fit(D, Y[i])

        # Get the estimated coefficients for the current sample
        X[i, :] = omp.coef_
    
    return X.T

In [49]:
sparse_coded = {
    node: block_ortho_match_pursuit(signals[node], np.eye(d), 4)
    for node in nodes
    }

In [50]:
support = {node: np.nonzero(sparse_coded[node][:,0]) for node in nodes}

In [51]:
edges

[(0, 1), (0, 3), (0, 6), (1, 2), (1, 5), (2, 4), (4, 6), (5, 6)]

In [52]:
support

{0: (array([0, 4, 6, 8], dtype=int64),),
 1: (array([4, 5, 8, 9], dtype=int64),),
 2: (array([4, 5, 8, 9], dtype=int64),),
 3: (array([0, 3, 4, 9], dtype=int64),),
 4: (array([0, 2, 4, 9], dtype=int64),),
 5: (array([2, 3, 5, 8], dtype=int64),),
 6: (array([2, 3, 4, 9], dtype=int64),)}

In [53]:
reconstructed = {node: np.eye(d) @ sparse_coded[node] for node in nodes}

In [54]:
pair_wise_subspaces_distance = {
    edge: GrassmanDistance(np.eye(d)[:,np.nonzero(sparse_coded[edge[0]][:,0])[0]], np.eye(d)[:,np.nonzero(sparse_coded[edge[1]][:,0])[0]])
    for edge in list(combinations(range(V), 2))
}

In [55]:
sorted(pair_wise_subspaces_distance.items(), key = lambda x: x[1])

[((1, 2), 0.0),
 ((3, 4), 1.5707963267948966),
 ((3, 6), 1.5707963267948966),
 ((4, 6), 1.5707963267948966),
 ((0, 1), 2.221441469079183),
 ((0, 2), 2.221441469079183),
 ((0, 3), 2.221441469079183),
 ((0, 4), 2.221441469079183),
 ((1, 3), 2.221441469079183),
 ((1, 4), 2.221441469079183),
 ((1, 5), 2.221441469079183),
 ((1, 6), 2.221441469079183),
 ((2, 3), 2.221441469079183),
 ((2, 4), 2.221441469079183),
 ((2, 5), 2.221441469079183),
 ((2, 6), 2.221441469079183),
 ((5, 6), 2.221441469079183),
 ((0, 5), 2.7206990463513265),
 ((0, 6), 2.7206990463513265),
 ((3, 5), 2.7206990463513265),
 ((4, 5), 2.7206990463513265)]

In [56]:
T = 0

H = {
    edge : {
        edge[0] : None,
        edge[1] : None
    }
for edge in combinations(nodes, 2)
}

In [57]:
for e in tqdm(combinations(nodes,2)):
    u = e[0]
    v = e[1]

    X_u = signals[u]
    X_v = signals[v]
    uu, uv, vv, vu = premultiplier(X_u, X_v)

    H[e][u] = chi_u(uu, uv, vv, vu)
    H[e][v] = chi_u(uu, uv, vv, vu)
    
    T += np.trace(H[e][u]) + np.trace(H[e][v])

21it [00:00, 4082.14it/s]


In [58]:
mu = T

In [59]:
H = {
    edge : {
        edge[0] : mu/T * (H[edge][edge[0]]),
        edge[1] : mu/T * (H[edge][edge[1]])
    }
for edge in combinations(nodes, 2)
}

In [60]:
all_edges = list(combinations(range(V), 2))

energies = {
    e : 0
    for e in all_edges
    }

for e in (all_edges):
    u = e[0]
    v = e[1]
    
    '''
    X_ = np.zeros_like(X)
    X_[u*d:(u+1)*d,:] = X[u*d:(u+1)*d,:]
    X_[v*d:(v+1)*d,:] = X[v*d:(v+1)*d,:]
    energies[e] = np.linalg.norm(BB @ X_)
    '''

    energies[e] = np.linalg.norm(H[e][e[0]] @ reconstructed[e[0]] - H[e][e[1]] @ reconstructed[e[1]])

In [64]:
retrieved = sorted(pair_wise_subspaces_distance.items(), key=lambda x:x[1])[:E]

In [65]:
print(f'Accuracy in retrieving underlying graph {len(set(list(map(lambda x: x[0], retrieved))).intersection(set(edges))) / E}')

Accuracy in retrieving underlying graph 0.5


In [66]:
retrieved

[((1, 2), 0.0),
 ((3, 4), 1.5707963267948966),
 ((3, 6), 1.5707963267948966),
 ((4, 6), 1.5707963267948966),
 ((0, 1), 2.221441469079183),
 ((0, 2), 2.221441469079183),
 ((0, 3), 2.221441469079183),
 ((0, 4), 2.221441469079183)]

In [46]:
edges

[(0, 1), (0, 3), (0, 6), (1, 2), (1, 5), (2, 4), (4, 6), (5, 6)]