In [49]:
import numpy as np
import scipy as sp
import matplotlib as plt
import importlib

# Dynamically import modules
amp = importlib.import_module("amp")
pca_pack = importlib.import_module("pca_pack")
preprocessing = importlib.import_module("preprocessing")
emp_bayes = importlib.import_module("emp_bayes")
hierarchical = importlib.import_module("hierarchical_clustering_modalities")
pipeline = importlib.import_module("complete_pipeline")

# Reload to reflect any changes made without restarting kernel
importlib.reload(amp)
importlib.reload(pca_pack)
importlib.reload(preprocessing)
importlib.reload(emp_bayes)
importlib.reload(hierarchical)
importlib.reload(pipeline)

# Now access objects from reloaded modules
ebamp_multimodal = amp.ebamp_multimodal
MultiModalityPCA = pca_pack.MultiModalityPCA
MultiModalityPCADiagnostics = preprocessing.MultiModalityPCADiagnostics
ClusterEmpiricalBayes = emp_bayes.ClusterEmpiricalBayes
ModalityClusterer = hierarchical.ModalityClusterer
MultimodalPCAPipeline = pipeline.MultimodalPCAPipeline
MultimodalPCAPipelineClustering = pipeline.MultimodalPCAPipelineClustering

In [50]:
def generate_rademacher(shape):
    return np.random.choice([-1, 1], size=shape)

In [51]:
# Set random seed for reproducibility
np.random.seed(22)

# Dimensions
n = 5000
p1, p2, p3 = 3000, 2000, 2500
r1, r2, r3 = 4, 5, 3

U1 = generate_rademacher((n, r1))

# --- U2: shares first 4 cols with U1, last col is i.i.d N(0,1) ---
U2 = np.hstack([U1[:, :r1], generate_rademacher((n, 1))])

# --- U3: i.i.d N(0,1) entries ---
U3 = generate_rademacher((n, r3))

# --- Generate V_k matrices with Rademacher entries ---

V1 = generate_rademacher((p1, r1))
V2 = generate_rademacher((p2, r2))
V3 = generate_rademacher((p3, r3))

# --- Create diagonal matrices D_k with 3 * k entries ---
D1 = np.diag([5 * (i+1) for i in range(r1)])
D2 = np.diag([5 * (i+1) for i in range(r2)])
D3 = np.diag([5 * (i+1) for i in range(r3)])

# --- Generate noise matrices Z_k ~ N(0, 1/n) ---
Z1 = np.random.randn(n, p1) / np.sqrt(n)
Z2 = np.random.randn(n, p2) / np.sqrt(n)
Z3 = np.random.randn(n, p3) / np.sqrt(n)

# --- Compute X_k = (1/n) * U_k D_k V_k^T + Z_k ---
X1 = (1/n) * U1 @ D1 @ V1.T + Z1
X2 = (1/n) * U2 @ D2 @ V2.T + Z2
X3 = (1/n) * U3 @ D3 @ V3.T + Z3

# --- Output list of modalities ---
X_list = [X1, X2, X3]
K_list = [r1, r2, r3]
cluster_labels_U = np.array([0, 0, 1])  # U1 and U2 are same cluster

In [52]:
# Run clustering-aware multimodal PCA pipeline
pipeline = MultimodalPCAPipelineClustering()

amp_results = pipeline.denoise_amp(
    X_list=X_list,
    K_list=K_list,
    compute_clusters=True,       # Set to False if passing cluster_labels_U
    num_clusters=2,              # Change as needed
    threshold=None,              # Optional: used if doing threshold-based clustering
    amp_iters=15,
    muteu=False,
    mutev=False,
    preprocess=False             # Set True if preprocessing needed
)


=== Step 2: PCA ===
Running PCA for Modality 0: Shape (5000, 3000), K=4
Estimated s: [19.98789921 14.99769314  9.99082197  4.98339411]
Running PCA for Modality 1: Shape (5000, 2000), K=5
Estimated s: [25.04140519 19.98062568 14.99162476  9.95746296  4.98143185]
Running PCA for Modality 2: Shape (5000, 2500), K=3
Estimated s: [15.01982604 10.01436914  5.01352604]

=== Step 3: Clustering Modalities via U ===
Similarity Matrix (HSS):
 [[ 0.99597544  0.67671812 -0.00225925]
 [ 0.67671812  0.98923884  0.00236035]
 [-0.00225925  0.00236035  0.998059  ]]
Cluster Labels for U: [1 1 2]

=== Step 4: Constructing Empirical Bayes Models ===

=== Step 5: Running AMP ===

--- AMP Iteration 1 ---
Sum of entries per modality (mu_u, mu_v, sigma_u, sigma_v):
  Modality 0: mu_u=29.1485, mu_v=9.7232, sigma_u=2.3302, sigma_v=0.7996
  Modality 1: mu_u=25.6823, mu_v=15.1232, sigma_u=1.7533, sigma_v=0.9982
  Modality 2: mu_u=14.8464, mu_v=30.5865, sigma_u=1.4788, sigma_v=3.0765

--- AMP Iteration 2 ---
Sum o