In [1]:
# Import specific functions
import sys
import numpy as np
from numpy.linalg import eig, inv, pinv, eigvals
from scipy.spatial.distance import squareform, pdist
from scipy.sparse import csr_matrix, random as sparse_random, find, issparse
# from scipy.stats import multivariate_normal as mvnpdf

import time
from scipy.spatial.distance import pdist, squareform
from scipy.linalg import inv
import numpy as np
import scipy.sparse as sp
from scipy.spatial.distance import squareform
from scipy.sparse import triu, coo_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, normalized_mutual_info_score
from scipy.sparse.linalg import norm as sparse_norm
from scipy.spatial.distance import squareform
from math import sqrt
import networkx as nx


sys.path.append('/Users/paul_reitz/Documents/repos/PyAWGLMM/Smooth_AWGLMM')
from scripts.utils import (
    visualize_glmm,
    graph_learning_perf_eval,
    identify_and_compare,
    generate_connected_graph,
    normest,
    lin_map,
    squareform_sp,
    sum_squareform,
    prox_sum_log,
    gsp_distanz,
)
from scripts.utils_deep import mvnpdf

np.random.seed(17307946)

In [2]:
def normalize_data(y):
    # Standardize each feature to zero mean, unit variance
    mean = np.mean(y, axis=0)
    std = np.std(y, axis=0) + 1e-12
    y_norm = (y - mean)/std
    return y_norm


In [3]:
def generate_connected_graph(n, p, zero_thresh, maxit=10, verbose=1):
    """
    Generate a connected Erdos-Renyi graph using networkx and ensure 
    its second smallest eigenvalue of Laplacian is > zero_thresh.

    Parameters
    ----------
    n : int
        Number of nodes.
    p : float
        Probability of connection between nodes.
    zero_thresh : float
        Threshold for the second smallest eigenvalue to ensure connectivity.
    maxit : int, optional
        Maximum number of tries to get a connected graph (default 10).
    verbose : int, optional
        Verbosity level (default 1).

    Returns
    -------
    G_dict : dict
        A dictionary mimicking the G structure with keys:
        - 'W': adjacency/weight matrix
        - 'L': Laplacian matrix
        - 'N': number of nodes
        - 'type': 'erdos_renyi'
    
    Raises
    ------
    ValueError
        If after maxit attempts no connected graph is found.
    """
    for iteration in range(1, maxit+1):
        # Generate an Erdos-Renyi graph
        Gnx = nx.erdos_renyi_graph(n, p)
        W = nx.to_numpy_array(Gnx, dtype=np.float64)

        # Remove any self loops by zeroing diagonal
        np.fill_diagonal(W, 0.0)

        # Compute Laplacian
        d = np.sum(W, axis=1)
        D = np.diag(d)
        L = D - W

        # Compute eigenvalues
        e = np.linalg.eigvalsh((L + L.T)*0.5)
        e = np.sort(e)

        # Check connectivity condition
        if len(e) > 1 and e[1] > zero_thresh:
            if verbose > 1:
                print(f"A connected graph has been created in {iteration} iteration(s)")

            G_dict = {}
            G_dict['W'] = W
            G_dict['L'] = L
            G_dict['N'] = n
            G_dict['type'] = 'erdos_renyi'
            return G_dict
        else:
            if verbose > 1:
                print(f"Iteration {iteration} failed. Trying again.")

    if verbose:
        print("Warning: The graph is not strongly connected after maxit attempts.")
    raise ValueError("Could not generate a connected graph after maxit attempts.")







In [4]:

# def gsp_learn_graph_log_degrees(Z, a, b, params=None):
#     if params is None:
#         params = {}
#     verbosity = params.get('verbosity', 1)
#     maxit = params.get('maxit', 1000)
#     tol = params.get('tol', 1e-5)
#     step_size = params.get('step_size', 0.5)
#     fix_zeros = params.get('fix_zeros', False)
#     max_w = params.get('max_w', np.inf)

#     w_0 = params.get('w_0', 0)
#     if w_0 != 0 and 'c' not in params:
#         raise ValueError("When w_0 is specified, c must also be specified")
#     c = params.get('c', 0.0 if w_0 == 0 else None)

#     # Convert Z to vector form
#     if Z.ndim == 2 and Z.shape[0] == Z.shape[1]:
#         z = squareform_sp(Z)
#     else:
#         z = Z
#     z = z.ravel()
#     l = len(z)
#     n = int(round((1 + sqrt(1+8*l))/2))

#     if not np.isscalar(w_0):
#         # Convert w_0 to vector form if needed
#         if w_0.ndim == 2 and w_0.shape[0] == w_0.shape[1]:
#             w_0 = squareform_sp(w_0)
#         w_0 = w_0.ravel()
#     else:
#         w_0 = float(w_0)

#     if fix_zeros:
#         edge_mask = params.get('edge_mask', None)
#         if edge_mask is None:
#             raise ValueError("edge_mask must be provided when fix_zeros is True")
#         if edge_mask.ndim == 2 and edge_mask.shape[0] == edge_mask.shape[1]:
#             edge_mask = squareform_sp(edge_mask)
#         edge_mask = edge_mask.ravel()
#         ind = np.flatnonzero(edge_mask)
#         z = z[ind].astype(float)
#         if not np.isscalar(w_0):
#             w_0 = w_0[ind].astype(float)
#     else:
#         z = z.astype(float)
#         if not np.isscalar(w_0):
#             w_0 = w_0.astype(float)

#     w = params.get('W_init', np.zeros_like(z, dtype=float))

#     # Construct S, St
#     if fix_zeros:
#         S, St = sum_squareform(n, edge_mask)
#     else:
#         S, St = sum_squareform(n)

#     K_op = lambda w_: S.dot(w_)
#     Kt_op = lambda z_: St.dot(z_)

#     if fix_zeros:
#         # Estimate norm_K
#         x_test = np.random.randn(S.shape[1])
#         for _ in range(5):
#             x_test = St.dot(S.dot(x_test))
#             norm_est = np.linalg.norm(x_test)
#             if norm_est == 0:
#                 break
#             x_test = x_test / norm_est
#         norm_K = sqrt(norm_est)
#     else:
#         norm_K = sqrt(2*(n-1))

#     def f_eval(w_):
#         return 2*np.dot(w_, z)
#     def f_prox(w_, c_):
#         return np.minimum(max_w, np.maximum(0, w_ - 2*c_*z))

#     def g_eval(x):
#         # Check if x has non-positive values
#         if np.any(x <= 0):
#             return np.inf
#         return -a * np.sum(np.log(x))

#     def g_prox(z_, c_):
#         sol, _ = prox_sum_log(z_, c_*a, param={'verbose':0})
#         return sol

#     def g_star_prox(z_, c_):
#         z_ = np.asarray(z_, dtype=float)
#         # If z_/(c_*a) not positive, prox_sum_log will report inf and handle gracefully
#         sol, _ = prox_sum_log(z_/(c_*a), 1/(c_*a), param={'verbose':0})
#         return z_ - c_*a * sol

#     if w_0 == 0:
#         def h_eval(w_):
#             return b*np.sum(w_**2)
#         def h_grad(w_):
#             return 2*b*w_
#         h_beta = 2*b
#     else:
#         def h_eval(w_):
#             return b*np.sum(w_**2) + c*np.sum((w_-w_0)**2)
#         def h_grad(w_):
#             return 2*((b+c)*w_ - c*w_0)
#         h_beta = 2*(b+c)

#     mu = h_beta + norm_K
#     epsilon = lin_map(0.0, [0, 1/(1+mu)], [0,1])
#     gn = lin_map(step_size, [epsilon, (1-epsilon)/mu], [0,1])

#     v_n = K_op(w)

#     stat = {}
#     if verbosity > 1:
#         stat['f_eval'] = np.full(maxit, np.nan)
#         stat['g_eval'] = np.full(maxit, np.nan)
#         stat['h_eval'] = np.full(maxit, np.nan)
#         stat['fgh_eval'] = np.full(maxit, np.nan)
#         stat['pos_violation'] = np.full(maxit, np.nan)
#         print('Relative change of primal, dual variables, and objective fun')

#     t0 = time.time()
#     for i in range(maxit):
#         Y_n = w - gn*(h_grad(w) + Kt_op(v_n))
#         y_n = v_n + gn*(K_op(w))

#         P_n = f_prox(Y_n, gn)
#         p_n = g_star_prox(y_n, gn)
#         Q_n = P_n - gn*(h_grad(P_n) + Kt_op(p_n))
#         q_n = p_n + gn*(K_op(P_n))

#         if verbosity > 2:
#             stat['f_eval'][i] = f_eval(w)
#             val_g = g_eval(K_op(w))
#             stat['g_eval'][i] = val_g
#             stat['h_eval'][i] = h_eval(w)
#             # If val_g is inf, fgh_eval might be inf as well, which is acceptable
#             stat['fgh_eval'][i] = stat['f_eval'][i] + stat['g_eval'][i] + stat['h_eval'][i]
#             stat['pos_violation'][i] = -np.sum(np.minimum(0,w))

#         denom_w = np.linalg.norm(w)
#         if denom_w < 1e-15:
#             denom_w = 1e-15
#         denom_v = np.linalg.norm(v_n)
#         if denom_v < 1e-15:
#             denom_v = 1e-15

#         rel_norm_primal = np.linalg.norm(-Y_n + Q_n)/denom_w
#         rel_norm_dual = np.linalg.norm(-y_n + q_n)/denom_v

#         if verbosity > 1 and verbosity <= 3:
#             print(f'iter {i+1:4d}: {rel_norm_primal:6.4e}   {rel_norm_dual:6.4e}')

#         w = w - Y_n + Q_n
#         v_n = v_n - y_n + q_n

#         if rel_norm_primal < tol and rel_norm_dual < tol:
#             break

#     stat['time'] = time.time() - t0
#     if verbosity > 0:
#         obj_val = f_eval(w) + g_eval(K_op(w)) + h_eval(w)
#         print(f'# iters: {i+1:4d}. Rel primal: {rel_norm_primal:6.4e} Rel dual: {rel_norm_dual:6.4e}  OBJ {obj_val:6.3e}')
#         print(f'Time needed is {stat["time"]} seconds')

#     if fix_zeros:
#         full_w = np.zeros(l, dtype=float)
#         full_w[ind] = w
#         w = full_w

#     if Z.ndim == 2 and Z.shape[0] == Z.shape[1]:
#         W = squareform_sp(w)
#     else:
#         W = w

#     return W, stat

In [5]:
def gsp_learn_graph_log_degrees(Z, a, b, params=None):
    if params is None:
        params = {}
    verbosity = params.get('verbosity', 1)
    maxit = params.get('maxit', 1000)
    tol = params.get('tol', 1e-5)
    step_size = params.get('step_size', 0.5)
    fix_zeros = params.get('fix_zeros', False)
    max_w = params.get('max_w', np.inf)

    w_0 = params.get('w_0', 0)
    if w_0 != 0 and 'c' not in params:
        raise ValueError("When params.w_0 is specified, c must also be specified")
    c = params.get('c', 0.0 if w_0 == 0 else None)

    from scipy.spatial.distance import squareform as sp_squareform
    # Convert Z to vector form
    if Z.ndim == 2 and Z.shape[0] == Z.shape[1]:
        z = squareform_sp(Z)
    else:
        z = Z
    z = z.ravel()
    l = len(z)
    n = int(round((1 + sqrt(1+8*l))/2))

    if not np.isscalar(w_0):
        # Convert w_0 to vector form if needed
        if w_0.ndim == 2 and w_0.shape[0] == w_0.shape[1]:
            w_0 = squareform_sp(w_0)
        w_0 = w_0.ravel()
    else:
        w_0 = float(w_0)

    if fix_zeros:
        edge_mask = params.get('edge_mask', None)
        if edge_mask is None:
            raise ValueError("edge_mask must be provided when fix_zeros is True")
        if edge_mask.ndim == 2 and edge_mask.shape[0] == edge_mask.shape[1]:
            edge_mask = squareform_sp(edge_mask)
        edge_mask = edge_mask.ravel()
        ind = np.flatnonzero(edge_mask)
        z = z[ind].astype(float)
        if not np.isscalar(w_0):
            w_0 = w_0[ind].astype(float)
    else:
        z = z.astype(float)
        if not np.isscalar(w_0):
            w_0 = w_0.astype(float)

    w = params.get('W_init', np.zeros_like(z, dtype=float))

    # Construct S, St
    if fix_zeros:
        S, St = sum_squareform(n, edge_mask)
    else:
        S, St = sum_squareform(n)

    K_op = lambda w_: S.dot(w_)
    Kt_op = lambda z_: St.dot(z_)

    if fix_zeros:
        norm_K = normest(S)
    else:
        norm_K = sqrt(2*(n-1))

    def f_eval(w_):
        return 2*np.dot(w_, z)
    def f_prox(w_, c_):
        return np.minimum(max_w, np.maximum(0, w_ - 2*c_*z))

    def g_eval(x):
        # if x<=0, log not defined => infinite
        if np.any(x <= 0):
            return np.inf
        return -a * np.sum(np.log(x))
    def g_prox(z_, c_):
        sol, _ = prox_sum_log(z_, c_*a, param={'verbose':-3})
        return sol

    def g_star_prox(z_, c_):
        z_ = np.asarray(z_, dtype=float)
        sol, _ = prox_sum_log(z_/(c_*a), 1/(c_*a), param={'verbose':-3})
        return z_ - c_*a * sol

    if w_0 == 0:
        def h_eval(w_):
            return b*np.sum(w_**2)
        def h_grad(w_):
            return 2*b*w_
        h_beta = 2*b
    else:
        def h_eval(w_):
            return b*np.sum(w_**2) + c*np.sum((w_-w_0)**2)
        def h_grad(w_):
            return 2*((b+c)*w_ - c*w_0)
        h_beta = 2*(b+c)

    mu = h_beta + norm_K
    epsilon = lin_map(0.0, [0, 1/(1+mu)], [0,1])
    gn = lin_map(step_size, [epsilon, (1-epsilon)/mu], [0,1])

    v_n = K_op(w)

    stat = {}
    if verbosity > 1:
        stat['f_eval'] = np.full(maxit, np.nan)
        stat['g_eval'] = np.full(maxit, np.nan)
        stat['h_eval'] = np.full(maxit, np.nan)
        stat['fgh_eval'] = np.full(maxit, np.nan)
        stat['pos_violation'] = np.full(maxit, np.nan)
        if verbosity > 1:
            print('Relative change of primal, dual variables, and objective fun')

    t0 = time.time()
    for i in range(maxit):
        Y_n = w - gn*(h_grad(w) + Kt_op(v_n))
        y_n = v_n + gn*(K_op(w))

        P_n = f_prox(Y_n, gn)
        p_n = g_star_prox(y_n, gn) 
        Q_n = P_n - gn*(h_grad(P_n) + Kt_op(p_n))
        q_n = p_n + gn*(K_op(P_n))

        if verbosity > 2:
            stat['f_eval'][i] = f_eval(w)
            val_g = g_eval(K_op(w))
            stat['g_eval'][i] = val_g
            stat['h_eval'][i] = h_eval(w)
            stat['fgh_eval'][i] = stat['f_eval'][i] + stat['g_eval'][i] + stat['h_eval'][i]
            stat['pos_violation'][i] = -np.sum(np.minimum(0,w))

        denom_w = np.linalg.norm(w)
        if denom_w < 1e-15:
            denom_w = 1e-15
        denom_v = np.linalg.norm(v_n)
        if denom_v < 1e-15:
            denom_v = 1e-15

        rel_norm_primal = np.linalg.norm(-Y_n + Q_n)/denom_w
        rel_norm_dual = np.linalg.norm(-y_n + q_n)/denom_v

        if verbosity > 1 and verbosity <= 3:
            print(f'iter {i+1:4d}: {rel_norm_primal:6.4e}   {rel_norm_dual:6.4e}')

        w = w - Y_n + Q_n
        v_n = v_n - y_n + q_n

        if rel_norm_primal < tol and rel_norm_dual < tol:
            break

    stat['time'] = time.time() - t0
    if verbosity > 0:
        obj_val = f_eval(w) + g_eval(K_op(w)) + h_eval(w)
        print(f'# iters: {i+1:4d}. Rel primal: {rel_norm_primal:6.4e} Rel dual: {rel_norm_dual:6.4e}  OBJ {obj_val:6.3e}')
        print(f'Time needed is {stat["time"]} seconds')

    if fix_zeros:
        # reconstruct full w
        full_w = np.zeros(l, dtype=float)
        full_w[ind] = w
        w = full_w

    # Convert back to matrix if Z was a matrix
    if Z.ndim == 2 and Z.shape[0] == Z.shape[1]:
        W = squareform_sp(w)
    else:
        W = w

    return W, stat

In [6]:


# def glmm(y, iterations, classes, spread=0.1, regul=0.15, norm_par=1.5):
#     """
#     Python version of the MATLAB function glmm_matlab.

#     Parameters
#     ----------
#     y : ndarray (m x n)
#         Data matrix with m samples and n features.
#     iterations : int
#         Number of iterations.
#     classes : int
#         Number of classes (clusters).
#     spread : float, optional
#         Default 0.1
#     regul : float, optional
#         Default 0.15
#     norm_par : float, optional
#         Default 1.5

#     Returns
#     -------
#     L : ndarray (n x n x classes)
#         Graph Laplacians for each class.
#     gamma_hat : ndarray (m x classes)
#         Cluster posterior probabilities.
#     mu : ndarray (n x classes)
#         Cluster means.
#     log_likelihood : ndarray (iterations,)
#         Log-likelihood at each iteration.
#     """
#     y = np.asarray(y, dtype=np.float64)
#     # Helper constants
#     delta = 2
#     m, n = y.shape  # m samples, n features

#     # Initialize arrays
#     L = np.zeros((n, n, classes))
#     W = np.zeros((n, n, classes))
#     sigma = np.zeros((n-1, n-1, classes))
#     mu = np.zeros((n, classes))
#     gamma_hat = np.zeros((m, classes))
#     p = np.zeros(classes)
#     vecl = np.zeros((n, n, classes))
#     vall = np.zeros((n, n, classes))
#     yl = np.zeros((m, n-1, classes))

#     # Initialization
#     for class_idx in range(classes):
#         L[..., class_idx] = spread * np.eye(n) - (spread / n) * np.ones((n, n))
#         mu_curr = np.mean(y, axis=0) + np.random.randn(n) * np.std(y, axis=0)
#         mu_curr = mu_curr - np.mean(mu_curr)
#         mu[:, class_idx] = mu_curr
#         p[class_idx] = 1.0 / classes

#     log_likelihood = np.zeros(iterations)

#     # Main loop
#     for it in range(iterations):
#         # Expectation step
#         # putting everything in eigenvector space of dim-1
#         pall = np.zeros(m)

#         for class_idx in range(classes):
#             # Eigen decomposition
#             eigvals, eigvecs = np.linalg.eig(L[..., class_idx])
#             # In MATLAB, [V,D] = eig(L) returns V as eigenvectors and D diag eigenvalues.
#             # np.linalg.eig returns eigenvalues as a vector and eigenvectors as columns of eigvecs
#             # We should ensure sorting if needed, but we'll assume identical behavior as MATLAB.
#             # Let's store directly:
#             vecl[..., class_idx] = eigvecs
#             vall[..., class_idx] = np.diag(eigvals)

#             # sigma = inv(vall(2:n,2:n,class) + regul*I)
#             # We take the sub-block from index 1 to end (2:n in MATLAB means skipping the first eigenvalue)
#             sub_eigvals = eigvals[1:]
#             Sigma_inv = np.diag(sub_eigvals) + regul * np.eye(n-1)
#             # inverse
#             Sigma = np.linalg.inv(Sigma_inv)
#             # Make symmetric
#             Sigma = (Sigma + Sigma.T) / 2
#             sigma[..., class_idx] = Sigma

#             # yl = (y - mu(:,class)') * vecl(:,2:n,class)
#             # mu[:,class_idx] is shape (n,)
#             # y is (m,n), mu is (n,), we want (y - mu') => (m,n)
#             # vecl(:,2:n,class_idx) => vecl[:,: ,class_idx], take from index 1 to n-1 for columns
#             Y_centered = y - mu[:, class_idx]
#             YL = Y_centered @ vecl[:, 1:, class_idx]
#             yl[..., class_idx] = YL

#             # pall = pall + p(class) * mvnpdf(yl(:,:,class), zeros(1,n-1), sigma(:,:,class))
#             # We assume mvnpdf(yl, mean=0, cov=sigma) returns a vector of length m
#             mvn_val = mvnpdf(YL, np.zeros(n-1), Sigma)
#             pall += p[class_idx] * mvn_val

#         # Avoid division by zero
#         pall[pall == 0] = 0.1

#         # compute gamma_hat
#         for class_idx in range(classes):
#             mvn_val = mvnpdf(yl[..., class_idx], np.zeros(n-1), sigma[..., class_idx])
#             gamma_hat[:, class_idx] = (p[class_idx] * mvn_val) / pall

#         # log-likelihood
#         log_likelihood[it] = np.sum(np.log(pall))

#         # Maximization step
#         for class_idx in range(classes):
#             # mu(:,class) = (gamma_hat(:,class)' * y) / sum(gamma_hat(:,class))
#             wght = gamma_hat[:, class_idx]
#             mu[:, class_idx] = (wght @ y) / np.sum(wght)

#             # yc = sqrt(gamma_hat(:,class)) .* (y - mu(:,class)')
#             # sqrt(gamma_hat(:,class)) is shape (m,)
#             # (y - mu[:,class_idx]) is (m,n)
#             # elementwise: we can do broadcast:
#             yc = (y - mu[:, class_idx]) * np.sqrt(wght)[:, None]

#             # Z = gsp_distanz(yc).^2
#             Z = gsp_distanz(yc)
#             Z = Z**2

#             # theta = mean(Z(:))/norm_par
#             theta = np.mean(Z) / norm_par

#             # W_curr = delta*gsp_learn_graph_log_degrees(Z./theta, 1, 1)
#             W_curr, _ = gsp_learn_graph_log_degrees(Z / theta, 1, 1, params={})
#             W_curr = delta * W_curr

#             p[class_idx] = np.sum(wght) / m

#             # Compute L(:,:,class)
#             # L = diag(sum(W)) - W
#             W_sum = np.sum(W_curr, axis=1)
#             L[..., class_idx] = np.diag(W_sum) - W_curr

#             # W_curr(W_curr<1e-3)=0
#             W_curr[W_curr < 1e-3] = 0
#             W[..., class_idx] = W_curr

#     return L, gamma_hat, mu, log_likelihood


In [7]:
def glmm(y, iterations, classes, spread=0.1, regul=0.15, norm_par=1.5):
    """
    Python version of glmm_matlab.

    Parameters
    ----------
    y : ndarray (m x n)
        Data matrix with m samples and n features.
    iterations : int
        Number of iterations.
    classes : int
        Number of classes (clusters).
    spread : float, optional
        Default 0.1
    regul : float, optional
        Default 0.15
    norm_par : float, optional
        Default 1.5

    Returns
    -------
    L : ndarray (n x n x classes)
        Graph Laplacians for each class.
    gamma_hat : ndarray (m x classes)
        Cluster posterior probabilities.
    mu : ndarray (n x classes)
        Cluster means.
    log_likelihood : ndarray (iterations,)
        Log-likelihood at each iteration.
    """
    y = np.asarray(y, dtype=np.float64)
    n = y.shape[1]
    m = y.shape[0]

    L = np.zeros((n,n,classes))
    W = np.zeros((n,n,classes))
    sigma = np.zeros((n-1,n-1,classes))
    mu = np.zeros((n, classes))
    gamma_hat = np.zeros((m, classes))
    p = np.zeros(classes)
    vecl = np.zeros((n,n,classes))
    vall = np.zeros((n,n,classes))
    yl = np.zeros((m, n-1, classes))

    # Initialization
    # mu_curr = mean(y) + randn(1,n).*std(y), then shift by mean again
    # p(class) = 1/classes
    for class_idx in range(classes):
        L[:,:,class_idx] = spread*np.eye(n) - (spread/n)*np.ones((n,n))
        mu_curr = np.mean(y, axis=0) + np.random.randn(n)*np.std(y,axis=0)
        mu_curr = mu_curr - np.mean(mu_curr)
        mu[:,class_idx] = mu_curr
        p[class_idx] = 1.0/classes

    log_likelihood = np.zeros(iterations)

    for it in range(iterations):
        # E-step
        pall = np.zeros(m, dtype=np.float64)
        for class_idx in range(classes):
            # eigen decomposition
            eigvals, eigvecs = np.linalg.eig(L[:,:,class_idx])
            # Store eigenvectors and diag eigenvalues
            vecl[:,:,class_idx] = eigvecs
            vall[:,:,class_idx] = np.diag(eigvals)

            sub_eigvals = eigvals[1:]
            Sigma_inv = np.diag(sub_eigvals) + regul*np.eye(n-1)
            Sigma = np.linalg.inv(Sigma_inv)
            Sigma = (Sigma+Sigma.T)/2
            sigma[:,:,class_idx] = Sigma

            Y_centered = y - mu[:,class_idx]
            YL = Y_centered @ vecl[:,1:,class_idx]
            yl[:,:,class_idx] = YL

            # mvnpdf(yl(:,:,class), zeros(1,n-1), sigma)
            mvn_val = mvnpdf(YL, np.zeros(n-1), Sigma)
            pall += p[class_idx]*mvn_val

        pall[pall==0] = 0.1

        for class_idx in range(classes):
            mvn_val = mvnpdf(yl[:,:,class_idx], np.zeros(n-1), sigma[:,:,class_idx])
            gamma_hat[:,class_idx] = (p[class_idx]*mvn_val)/pall

        log_likelihood[it] = np.sum(np.log(pall))

        # M-step
        for class_idx in range(classes):
            wght = gamma_hat[:,class_idx]
            mu[:,class_idx] = (wght @ y)/np.sum(wght)

            yc = (y - mu[:,class_idx])*np.sqrt(wght)[:,None]
            Z = gsp_distanz(yc)**2
            theta = np.mean(Z)/norm_par

            # W_curr = delta*gsp_learn_graph_log_degrees(Z./theta, 1,1)
            # delta = 2 from code 
            delta = 2
            W_curr, _ = gsp_learn_graph_log_degrees(Z/theta, 1, 1, params={})
            W_curr = delta*W_curr

            p[class_idx] = np.sum(wght)/m
            # L(:,:,class) = diag(sum(W)) - W
            W_sum = np.sum(W_curr, axis=1)
            L[:,:,class_idx] = np.diag(W_sum)-W_curr
            W_curr[W_curr<1e-3] = 0
            W[:,:,class_idx] = W_curr

    return L, gamma_hat, mu, log_likelihood

In [8]:



# Example parameters
n = 15  # graph size
m = 150  # number of signals
k = 2    # number of clusters
zero_thresh = 10e-4
p = np.linspace(0, 1, k+1)  # p = 0:1/k:1 in MATLAB
print(p)

# Generate graphs as MATLAB does
g = []
for i in range(k):
    g.append(generate_connected_graph(n, 0.7, zero_thresh, maxit=10, verbose=2))

# Generate gamma and gamma_cut
gamma = np.random.rand(m, 1)  # gamma = rand([m,1]) in MATLAB
gamma_cut = np.zeros((m, k))

dist = 0.5
y = np.zeros((m, n))
true_y = np.zeros((m, n, k))
center = np.zeros((n, k))
gauss = np.zeros((n, n, k))
Lap = np.zeros((n, n, k))

for i in range(k):
    # In MATLAB: gc = pinv(full(g(i).L));
    # We have g[i] as a dict, use g[i]['L']
    L_mat = g[i]['L']
    gc = np.linalg.pinv(L_mat)
    gauss[:, :, i] = (gc + gc.T)/2
    Lap[:, :, i] = L_mat

    c = dist * np.random.randn(n)
    c = c - np.mean(c)
    center[:, i] = c

    # gamma_cut(p(i)<gamma & gamma<=p(i+1), i) = 1;
    mask = (gamma[:, 0] > p[i]) & (gamma[:, 0] <= p[i+1])
    gamma_cut[mask, i] = 1

    samples = np.random.multivariate_normal(center[:, i], gauss[:, :, i], m)
    samples = gamma_cut[:, i][:, np.newaxis] * samples
    true_y[:, :, i] = samples
    y += samples

# Now we train glmm on data y
# Assuming glmm function is defined and returns (Ls, gamma_hats, mus, log_likelihood)
iterations = 200
Ls, gamma_hats, mus, log_likelihood = glmm(y, iterations, k)
print('Training done')

print("sum(gamma_hats,1):", np.sum(gamma_hats, axis=0))

# If identify_and_compare returns (identify, precision, recall, f, cl_errors)
identify, precision, recall, f, cl_errors ,  NMI_scores, num_of_edges_arr = identify_and_compare(Ls, Lap, gamma_hats, gamma_cut, k)

print("Identify:", identify)
print("Precision:", precision)
print("Recall:", recall)
print("F-measure:", f)
print("Cluster Errors:", cl_errors)
print("NMI Scores:", NMI_scores)
print("Number of edges:", num_of_edges_arr)

summed_gamma_hats = np.sum(gamma_hats, axis=1)
are_all_elements_one = np.allclose(summed_gamma_hats, 1.0, atol=1e-8)
print("\nAre all elements in each row of gamma_hats summing to 1:", are_all_elements_one)


[0.  0.5 1. ]
A connected graph has been created in 1 iteration(s)
A connected graph has been created in 1 iteration(s)
# iters:  180. Rel primal: 9.5094e-06 Rel dual: 3.1564e-06  OBJ 1.570e+01
Time needed is 0.01863408088684082 seconds
# iters:  180. Rel primal: 9.9550e-06 Rel dual: 3.6097e-06  OBJ 1.587e+01
Time needed is 0.006992816925048828 seconds
# iters:  184. Rel primal: 9.9536e-06 Rel dual: 3.7043e-06  OBJ 1.573e+01
Time needed is 0.007194042205810547 seconds
# iters:  183. Rel primal: 9.6488e-06 Rel dual: 3.5393e-06  OBJ 1.578e+01
Time needed is 0.007764101028442383 seconds
# iters:  185. Rel primal: 9.8626e-06 Rel dual: 3.6676e-06  OBJ 1.574e+01
Time needed is 0.007103919982910156 seconds
# iters:  184. Rel primal: 9.8959e-06 Rel dual: 3.6474e-06  OBJ 1.575e+01
Time needed is 0.007091999053955078 seconds
# iters:  185. Rel primal: 9.7238e-06 Rel dual: 3.6132e-06  OBJ 1.574e+01
Time needed is 0.007013797760009766 seconds
# iters:  185. Rel primal: 9.8694e-06 Rel dual: 3.6445e

In [9]:


# Parameters
n = 15  # graph size
m = 150 # number of signals
k = 2   # number of clusters
zero_thresh = 1e-4  

g = []
for i in range(k):
    g.append(generate_connected_graph(n, 0.7, zero_thresh, maxit=10, verbose=1))

gamma = np.random.rand(m, 1) 
gamma_cut = np.zeros((m, k))
dist = 0.5
p = np.linspace(0, 1, k+1)
y = np.zeros((m, n))
true_y = np.zeros((m, n, k))
center = np.zeros((n, k))
gauss = np.zeros((n, n, k))
Lap = np.zeros((n, n, k))

for i in range(k):
    L_mat = g[i]['L']
    gc = np.linalg.pinv(L_mat)
    gauss[:, :, i] = (gc + gc.T) / 2
    Lap[:, :, i] = L_mat
    c = dist * np.random.randn(n)
    c = c - np.mean(c)
    center[:, i] = c
    mask = (gamma[:, 0] > p[i]) & (gamma[:, 0] <= p[i+1])
    gamma_cut[mask, i] = 1
    samples = np.random.multivariate_normal(center[:, i], gauss[:, :, i], m)
    # Multiply each row by gamma_cut[:,i]
    samples = gamma_cut[:, i][:, np.newaxis] * samples
    true_y[:, :, i] = samples
    y += samples

# Now train glmm on data y
iterations = 200
Ls, gamma_hats, mus, log_likelihood = glmm(y, iterations, k)
print('Training done')
print("sum(gamma_hats,1):", np.sum(gamma_hats, axis=0))

identify, precision, recall, f, cl_errors, NMI_scores, num_of_edges_arr = identify_and_compare(Ls, Lap, gamma_hats, gamma_cut, k)

print("Identify:", identify)
print("Precision:", precision)
print("Recall:", recall)
print("F-measure:", f)
print("Cluster Errors:", cl_errors)
print("NMI Scores:", NMI_scores)
print("Number of edges:", num_of_edges_arr)

summed_gamma_hats = np.sum(gamma_hats, axis=1)
are_all_elements_one = np.allclose(summed_gamma_hats, 1.0, atol=1e-8)
print("\nAre all elements in each row of gamma_hats summing to 1:", are_all_elements_one)


# iters:  115. Rel primal: 9.8354e-06 Rel dual: 5.4727e-07  OBJ 1.292e+01
Time needed is 0.007689237594604492 seconds
# iters:  116. Rel primal: 9.0047e-06 Rel dual: 5.2771e-07  OBJ 1.280e+01
Time needed is 0.010355949401855469 seconds
# iters:  114. Rel primal: 9.9668e-06 Rel dual: 6.2763e-07  OBJ 1.293e+01
Time needed is 0.0052258968353271484 seconds
# iters:  116. Rel primal: 9.8362e-06 Rel dual: 7.0056e-07  OBJ 1.279e+01
Time needed is 0.008489847183227539 seconds
# iters:  114. Rel primal: 9.3138e-06 Rel dual: 5.0919e-07  OBJ 1.295e+01
Time needed is 0.006794929504394531 seconds
# iters:  117. Rel primal: 9.1821e-06 Rel dual: 5.7509e-07  OBJ 1.279e+01
Time needed is 0.004303932189941406 seconds
# iters:  114. Rel primal: 8.8878e-06 Rel dual: 4.4300e-07  OBJ 1.297e+01
Time needed is 0.004200935363769531 seconds
# iters:  117. Rel primal: 9.4469e-06 Rel dual: 6.2902e-07  OBJ 1.280e+01
Time needed is 0.004299163818359375 seconds
# iters:  113. Rel primal: 9.6729e-06 Rel dual: 5.4650e

In [10]:


# Set parameters according to the theoretical setup:
n = 15   # graph size
m = 150  # number of signals
k = 2    # number of clusters
zero_thresh = 1e-4

# Generate k connected Erdos-Renyi graphs as in the MATLAB code
g = []
for i in range(k):
    g.append(generate_connected_graph(n, 0.7, zero_thresh, maxit=10, verbose=1))

# Generate cluster memberships and signals according to the setup:
# gamma = rand(m,1) ~ uniform random in [0,1)
# p = [0, 1/k, 2/k, ..., 1] to define cluster assignments
gamma = np.random.rand(m, 1)
gamma_cut = np.zeros((m, k))
dist = 0.5
p = np.linspace(0, 1, k+1)  # for k=2, p=[0,0.5,1]; for balanced clusters {0.5,0.5}

y = np.zeros((m, n))
true_y = np.zeros((m, n, k))
center = np.zeros((n, k))
gauss = np.zeros((n, n, k))
Lap = np.zeros((n, n, k))

# According to the theory, each cluster k has:
#  - A mean vector µ_k ~ N(0, σ^2 I) with σ=0.5, done by dist * randn and then centering.
#  - Signals from cluster k are: x_m ∼ N(µ_k, L_k^\dagger)
# The code:
for i in range(k):
    L_mat = g[i]['L']          # Retrieve the Laplacian from the generated graph
    gc = np.linalg.pinv(L_mat) # Pseudoinverse for covariance
    gauss[:, :, i] = (gc + gc.T) / 2
    Lap[:, :, i] = L_mat

    # Generate center vector µ_k as per σ=0.5
    c = dist * np.random.randn(n)
    c = c - np.mean(c)  # ensure mean is zero
    center[:, i] = c

    # Assign signals to cluster i according to p:
    mask = (gamma[:, 0] > p[i]) & (gamma[:, 0] <= p[i+1])
    gamma_cut[mask, i] = 1

    # Generate signals for cluster i
    # x_m ∼ N(µ_i, gauss[:,:,i]) if in cluster i, else 0
    samples = np.random.multivariate_normal(center[:, i], gauss[:, :, i], m)
    samples = gamma_cut[:, i][:, np.newaxis] * samples  # 0 out for non-members
    true_y[:, :, i] = samples
    y += samples

# Now we train the GLMM on data y:
# Using glmm as previously defined
iterations = 200
y= normalize_data(y)
Ls, gamma_hats, mus, log_likelihood = glmm(y, iterations, k)
print('Training done')

# Check sum(gamma_hats,1) as in MATLAB (sum along rows in MATLAB is axis=0 in Python)
print("sum(gamma_hats,1):", np.sum(gamma_hats, axis=0))

# Identify and compare results (assuming identify_and_compare returns identify, precision, recall, f, cl_errors, NMI_scores, num_of_edges_arr)
identify, precision, recall, f, cl_errors, NMI_scores, num_of_edges_arr = identify_and_compare(Ls, Lap, gamma_hats, gamma_cut, k)

print("Identify:", identify)
print("Precision:", precision)
print("Recall:", recall)
print("F-measure:", f)
print("Cluster Errors:", cl_errors)
print("NMI Scores:", NMI_scores)
print("Number of edges:", num_of_edges_arr)

# Check if rows of gamma_hats sum to 1
summed_gamma_hats = np.sum(gamma_hats, axis=1)
are_all_elements_one = np.allclose(summed_gamma_hats, 1.0, atol=1e-8)
print("\nAre all elements in each row of gamma_hats summing to 1:", are_all_elements_one)


# iters:  113. Rel primal: 9.3773e-06 Rel dual: 1.4604e-06  OBJ 1.559e+01
Time needed is 0.0042269229888916016 seconds
# iters:  103. Rel primal: 9.5450e-06 Rel dual: 2.2919e-06  OBJ 1.539e+01
Time needed is 0.003918886184692383 seconds
# iters:  113. Rel primal: 9.0955e-06 Rel dual: 1.6782e-06  OBJ 1.603e+01
Time needed is 0.004656314849853516 seconds
# iters:  100. Rel primal: 9.2979e-06 Rel dual: 2.8353e-06  OBJ 1.538e+01
Time needed is 0.0037620067596435547 seconds
# iters:  119. Rel primal: 9.4597e-06 Rel dual: 1.9853e-06  OBJ 1.636e+01
Time needed is 0.011002063751220703 seconds
# iters:  102. Rel primal: 9.3334e-06 Rel dual: 4.4907e-06  OBJ 1.517e+01
Time needed is 0.003980159759521484 seconds
# iters:  119. Rel primal: 9.7231e-06 Rel dual: 2.1623e-06  OBJ 1.674e+01
Time needed is 0.007118940353393555 seconds
# iters:   98. Rel primal: 9.9571e-06 Rel dual: 5.0761e-06  OBJ 1.510e+01
Time needed is 0.0037202835083007812 seconds
# iters:  110. Rel primal: 9.9420e-06 Rel dual: 5.657

In [11]:

# n = 15  
# m = 150  
# k = 2
# zero_thresh = 10e-4

# g = [generate_connected_graph(n, 0.7, zero_thresh) for _ in range(k)]

# gamma = np.random.rand(m, 1)
# gamma_cut = np.zeros((m, k))
# dist = 0.5
# p = np.linspace(0, 1, k + 1)x
# y = np.zeros((m, n))
# true_y = np.zeros((m, n, k))
# center = np.zeros((n, k))
# gauss = np.zeros((n, n, k))
# Lap = np.zeros((n, n, k))

# for i in range(k):
#     gc = pinv(g[i])
#     gauss[:, :, i] = (gc + gc.T) / 2
#     Lap[:, :, i] = g[i]
#     center[:, i] = dist * np.random.randn(n)
#     center[:, i] = center[:, i] - np.mean(center[:, i])
#     gamma_cut[(p[i] < gamma[:, 0]) & (gamma[:, 0] <= p[i + 1]), i] = 1
#     true_y[:, :, i] = gamma_cut[:, i][:, np.newaxis] * np.random.multivariate_normal(center[:, i], gauss[:, :, i], m)
#     y += true_y[:, :, i]

# iterations = 200
# Ls, gamma_hats, mus, log_likelihood = glmm(y, iterations, k)
# print('Training done')
# # identify, precision, recall, f, cl_errors = identify_and_compare(Ls, Lap, gamma_hats, gamma_cut, k)
# identify, precision, recall, f, cl_errors, NMI_score, num_of_edges = identify_and_compare(Ls, Lap, gamma_hats, gamma_cut, k)

# print("Identify:", identify)
# print("Precision:", precision)
# print("Recall:", recall)
# print("F-measure:", f)
# print("Cluster Errors:", cl_errors)
# print('Normalized mutual information', NMI_score)
# print("Number of estimated edges", num_of_edges)
# summed_gamma_hats = np.sum(gamma_hats, axis=1)
# summed_gamma_hats_column = summed_gamma_hats[:, np.newaxis]
# are_all_elements_one = np.allclose(summed_gamma_hats_column, 1, atol=1e-8)
# print("\nAre all elements in the colum wise summed gamma_hat equal to 1:", are_all_elements_one)
# visualize_glmm(Ls, gamma_hats)