In [1]:
import numpy as np 
from scipy.fft import dct

from itertools import combinations  
from tqdm import tqdm 

import warnings
warnings.filterwarnings("ignore")

In [2]:
class SyntheticGrassmanGraph:
    def __init__(
            self, 
            V, 
            d, 
            basis,
            seed,
            mode,
            E0 = None,
            tau = None
    ):
        '''
        A class implementing our graph model based on the alignment of linear vector subspaces
        Attributes:
            V: int ->           Number of nodes
            d: int ->           Nodes stalks dimension
            basis: np.array ->  A complete dictionary (we usually use DCT) for the stalk on the nodes (we consider R^d without loss of generality)
            seed: int ->        Random seed for reproducibility
            mode: str ->        Mode for the generation of the graph, 
                                either "boolean" if based on cardinality constrained programming 
                                or "geometric" if based on thresholding the post-alignemnt distance
            E0: int ->          Prior assumption on the number of edges
            tau: float ->       Threshold for the geometric graph
        Methods:
            subpaceAssignment   Randomly assign to each node a linear subspace of R^d with dimension sampled uniformly at random between 1 and d
            Procrustes          Compute the orthogonal Procrustes solution to the alignment problem ||F_uD_u - F_vD_v||_F^2
            alignment           Perform the alignment between each pair of subspaces
            inference           Carries out a graph based on one of the two mode as a set of edges
        '''

        assert V >= d
        assert mode in ['Geometric','Boolean']

        self.V = V
        self.d = d
        self.E0 = E0
        self.tau = tau
        self.basis = basis
        self.seed = seed
        self.mode = mode

        self.subSpaceAssignment()
        self.alignment()

    def subSpaceAssignment(self):
        
        # Randomly generating subspaces spanned by subsets of the shared basis
        self.SS = {v: np.random.choice(self.d, np.random.choice(np.arange(1,self.d), 1), replace = False) for v in range(self.V)}

        # Preallocating basis for such subspaces
        self.DS = {
            v: np.hstack([self.basis[:,self.SS[v]], np.zeros((self.d, self.d - len(self.SS[v])))])
            for v in range(self.V)
        }
        
    def Procrustes(self, u, v):
        
        D_u = self.DS[u]
        D_v = self.DS[v]

        # Procrustes solution to the alignment problem is based on SVD of the cross product between the basis of the two spaces to be aligned
        X, _, Y = np.linalg.svd(D_v @ D_u.T)
        F_u = X @ Y
        F_v = np.eye(self.d)

        return F_u, F_v, D_u, D_v
    
    def alignment(self):

        # Perform the alignment and store the distance after this operation
        self.dists = {}
        for (u,v) in list(combinations(range(self.V), 2)):
            F_u, F_v, D_u, D_v = self.Procrustes(u,v)
            self.dists[(u,v)] = np.linalg.norm(F_u @ D_u - F_v @ D_v)
    
    def inference(self):
        
        # Edge-set-cardinality based graph -> Sort the Dirichlet energies and keep the first E0
        if self.mode == 'Boolean':
            sorted_edges = sorted(self.dists.items(), key=lambda x:x[1])
            return list(map(lambda x: x[0], sorted_edges[:self.E0]))

        # Alignment-distance-thresholding based graph -> Keep the subset of edges whose Dirichlet energy is below the threshold tau
        if self.mode == 'Geometric':
            filtered_edges = filter(lambda x: x[1] <= self.tau, self.dists.items())
            return list(map(lambda x: x[0], filtered_edges))


In [3]:
# Example utilization

d = 64
B = dct(np.eye(d), axis=0, norm='ortho').T
G = SyntheticGrassmanGraph(64, 64, B, 42, 'Boolean', 100, 0)

In [19]:
class SyntheticSignals:
    def __init__(self, basis, V, d, SNR, seed, snaps):
        '''
        A class for generating possibly noisy signals with a covariance structure lying on different linear subspaces
        Attributes:
            V: int ->               Number of nodes
            d: int ->               Nodes stalks dimension
            SNR: float ->           Signal-to-noise ratio (db)
            seed: int ->            Random seed for reproducibility
            snaps: int ->           Number of signals observed in each subspace
        Methods:
            subpaceAssignment       Randomly assign to each node a linear subspace of R^d with dimension sampled uniformly at random between 1 and d
            random_crosscov         Generates a random crosscovariance matrix with a specific block-sparsity pattern
            random_coefficients     Generates gaussian distributed coefficients for the sparse construction of signals 
            noise                   Generates noise with a given SNR
            normalize               Performs L2 normalization 
            signals_generation      Generates signals accordingly to precomputed basis and spectrum 
        '''

        self.basis = basis
        self.V = V
        self.d = d
        self.SNR = SNR
        self.seed = seed
        self.snaps = snaps

        self.subspace_assignment()
        self.random_crosscov()
        self.random_coefficients()
        self.signals_generation()
        
    def subspace_assignment(self):

        self.SS = {v: np.random.choice(self.d, np.random.choice(np.arange(1,self.d), 1), replace = False) for v in range(self.V)}
        self.dims = {v:len(self.SS[v]) for v in range(self.V)}

    def random_crosscov(self, scale = 1):
        
        self.dim = np.sum(list(self.dims.values()))

        # Generate a random positive spectrum
        eigenvalues = np.random.exponential(scale, self.dim)

        # Use QR decomposition on a matrix with random entries to get an orthonormal basis
        Q, _ = np.linalg.qr(np.random.randn(self.dim, self.dim))

        # Build a covariance matrix using the computed spectrum and orthonormal basis
        self.C = Q @ np.diag(eigenvalues) @ Q.T
        '''
        self.Cs = {(u,v): None for (u,v) in list(combinations(range(self.V),2))}

        # Map all the blocks of the covariance matrix for further operations
        start_i = 0
        for i, di in list(self.dims.items()):
            start_j = start_i + di  # Upper triangular starts after the diagonal block
            for j, dj in list(self.dims.items())[i+1:]:
                end_i = start_i + di
                end_j = start_j + dj
                self.Cs[(i,j)] = self.C[start_i:end_i, start_j:end_j]
                start_j = end_j
            start_i += di
        '''

    def random_coefficients(self):
        
        # Generate a stack of all the coefficients leveraging the imposed covariance structure
        self.S = np.random.randn(self.dim, self.snaps)

        # Map back all the coefficients ot each node
        self.coeffs = {v: None for v in range(self.V)}
        start = 0
        for v in range(self.V):
            end = start + self.dims[v]
            self.coeffs[v] = self.S[start:end, :]
            start = end
    
    def noise(self, signal):

        if self.SNR is None:
            return np.zeros_like(signal)

        signal_power = np.mean(np.square(signal), axis=0, keepdims=True)
        snr_linear = 10 ** (self.SNR / 10)
        noise_power = signal_power / snr_linear

        noise = np.sqrt(noise_power) * np.random.randn(*signal.shape)
        return noise
    '''
    def normalize(self, signal):

        return signal / np.linalg.norm(signal, axis=0)
    '''
    def signals_generation(self):
        
        # Finally we leverage the structure we impose to generate signals
        np.random.seed(self.seed)
        self.signals = {
            node: (
                signal := self.basis[:,self.SS[node]] @ self.coeffs[node]) 
             + self.noise(signal)
            for node in range(self.V)
        }

In [20]:
# Example utilization

d = 64
B = dct(np.eye(d), axis=0, norm='ortho').T
X = SyntheticSignals(B, 64, 64, None, 42, 100)

In [21]:
class DataDrivenGrassmanGraph:
    def __init__(
            self, 
            V, 
            d,
            X,
            basis,
            seed,
            mode,
            E0 = None,
            tau = None
    ):
        '''
        This class should implement the same idea of subspace alignment 
        but through a data-driven apoproach, i.e. using observed signals and not basis for subspaces (still under modeling phase)
        Attributes:
            V: int ->               Number of nodes
            d: int ->               Nodes stalks dimension
            X: SyntheticSignals ->  An instantiated object of the SyntheticSignals class
            seed: int ->            Random seed for reproducibility
            mode: str ->            Mode for the generation of the graph, 
                                    either "boolean" if based on cardinality constrained programming 
                                    or "geometric" if based on thresholding the post-alignemnt distance
            E0: int ->              Prior assumption on the number of edges
            tau: float ->           Threshold for the geometric graph
        Methods:
            Procrustes              Compute the orthogonal Procrustes solution to the data-driven alignment problem ||F_u X_u - F_v X_v||_F^2
            alignment               Perform the alignment between each pair of subspaces
            inference               Carries out a graph based on one of the two mode as a set of edges
        '''
        assert V >= d
        assert mode in ['Geometric','Boolean']

        self.V = V
        self.d = d

        self.E0 = E0
        self.tau = tau

        self.X = X
        self.basis = basis
  
        self.seed = seed
        self.mode = mode

        self.alignment()

    def Procrustes(self, u, v):
        
        S_u = self.X.coeffs[u]
        S_v = self.X.coeffs[v]

        # We use the empirical cross covariance
        C_uv = S_u @ S_v.T / self.X.snaps
        
        #C_uv = self.X.Cs[(u,v)]
        D_u = self.basis[:,self.X.SS[u]]
        D_v = self.basis[:,self.X.SS[v]]

        X, _, Y = np.linalg.svd(D_u @ C_uv @ D_v.T, full_matrices=False)
        F_u = Y.T @ X.T
        F_v = np.eye(self.d)

        return F_u, F_v, D_u @ S_u, D_v @ S_v
    
    def alignment(self):
        
        self.pre_dists = {}
        self.post_dists = {}

        self.dims_diff = {}
        
        for (u,v) in list(combinations(range(self.V), 2)):
            F_u, F_v, S_u, S_v = self.Procrustes(u,v)
            self.pre_dists[(u,v)] = np.linalg.norm(S_u - S_v)**2
            self.post_dists[(u,v)] = np.linalg.norm(F_u @ S_u - F_v @ S_v)**2
            self.dims_diff[(u,v)] = np.abs(len(self.X.SS[u]) - len(self.X.SS[v]))
            
    def inference(self):
        
        # Edge-set-cardinality based graph
        if self.mode == 'Boolean':
            sorted_edges = sorted(self.dists.items(), key=lambda x:x[1])
            return list(map(lambda x: x[0], sorted_edges[:self.E0]))

        # Alignment-distance-thresholding based graph
        if self.mode == 'Geometric':
            filtered_edges = filter(lambda x: x[1] <= self.tau, self.dists.items())
            return list(map(lambda x: x[0], filtered_edges))

In [22]:
G = DataDrivenGrassmanGraph(64, 64, X, B, 42, 'Boolean', 100)

In [23]:
G.pre_dists

{(0, 1): 8457.367980550931,
 (0, 2): 10271.764357738646,
 (0, 3): 8230.66769238455,
 (0, 4): 7650.02846756155,
 (0, 5): 5163.599480919715,
 (0, 6): 7816.867238740123,
 (0, 7): 7735.225372344636,
 (0, 8): 8611.96596829466,
 (0, 9): 5163.300389507472,
 (0, 10): 6897.092562517464,
 (0, 11): 5489.624975859768,
 (0, 12): 8693.01467412826,
 (0, 13): 5408.594076324941,
 (0, 14): 9588.74207317872,
 (0, 15): 4414.740090356656,
 (0, 16): 7540.286860567413,
 (0, 17): 8106.893674055504,
 (0, 18): 7147.222275515466,
 (0, 19): 9066.609582938632,
 (0, 20): 6866.497643445576,
 (0, 21): 4044.179514622872,
 (0, 22): 8335.943387212781,
 (0, 23): 7659.120508893857,
 (0, 24): 7280.377065115735,
 (0, 25): 6332.856428244337,
 (0, 26): 6992.919443022762,
 (0, 27): 6802.4859367499175,
 (0, 28): 7188.798444979652,
 (0, 29): 5823.459135363801,
 (0, 30): 5354.728815997444,
 (0, 31): 6044.004230467285,
 (0, 32): 7313.142021065792,
 (0, 33): 8924.689670295998,
 (0, 34): 7033.958574874957,
 (0, 35): 8824.85228901594

In [24]:
G.post_dists

{(0, 1): 3931.2561142449617,
 (0, 2): 4767.422163181853,
 (0, 3): 4054.9752419617403,
 (0, 4): 3827.211168137089,
 (0, 5): 3788.525585648062,
 (0, 6): 3837.978447499406,
 (0, 7): 3813.655723655313,
 (0, 8): 4039.0102072852123,
 (0, 9): 3594.701071463929,
 (0, 10): 3877.328979008057,
 (0, 11): 3788.0395989854887,
 (0, 12): 4132.488932998011,
 (0, 13): 3626.4980569119357,
 (0, 14): 4536.67916147558,
 (0, 15): 3790.5268330087033,
 (0, 16): 3647.7510015443345,
 (0, 17): 3785.0914635825143,
 (0, 18): 3546.8810125545533,
 (0, 19): 4178.004200145377,
 (0, 20): 3684.9332783011305,
 (0, 21): 3816.9634870641535,
 (0, 22): 3818.192419815997,
 (0, 23): 3751.2683191689075,
 (0, 24): 3705.3162927301278,
 (0, 25): 3633.338798476985,
 (0, 26): 3619.632601757708,
 (0, 27): 3750.923165013759,
 (0, 28): 3807.4618971662767,
 (0, 29): 3655.9325542819615,
 (0, 30): 3651.945804238711,
 (0, 31): 3618.2718204472076,
 (0, 32): 3745.595779402411,
 (0, 33): 4261.312182780708,
 (0, 34): 3680.370630703323,
 (0, 35)