In [None]:
import sys
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.impute import SimpleImputer
from sklearn.metrics import f1_score, precision_score, recall_score, silhouette_score, accuracy_score, normalized_mutual_info_score, adjusted_rand_score
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import umap
import optuna

# Try importing Lifelines for C-index
try:
    from lifelines.utils import concordance_index
    HAS_LIFELINES = True
except ImportError:
    HAS_LIFELINES = False
    # print("Warning: lifelines not found. C-index will be skipped.")
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f">>> Running on: {DEVICE}")

# Reproducibility
torch.manual_seed(42)
np.random.seed(42)

FEATURE_COUNTS = [1000, 1500, 2500, 3500, 5000]
SUBTYPES_OF_INTEREST = [
    'Leiomyosarcoma, NOS',
    'Dedifferentiated liposarcoma',
    'Undifferentiated sarcoma',
    'Fibromyxosarcoma'
]
N_TRIALS = 30 # Number of Optuna trials per feature count

>>> Running on: cuda


In [2]:
def load_raw_aligned_data():
    print(f"\n>>> LOADING RAW ALIGNED DATA")
    
    # 1. Load Phenotype/Labels
    pheno_path = "../Data/phenotype_clean.csv"
    if not os.path.exists(pheno_path):
        raise FileNotFoundError(f"{pheno_path} not found.")
    
    pheno = pd.read_csv(pheno_path, index_col=0)
    
    col_name = 'primary_diagnosis.diagnoses'
    if col_name not in pheno.columns:
        print(f"Warning: '{col_name}' not found. Available: {pheno.columns.tolist()}")
        return None
        
    mask = pheno[col_name].isin(SUBTYPES_OF_INTEREST)
    pheno = pheno[mask]
    print(f"  Phenotype Samples (filtered): {pheno.shape[0]}")

    # 2. Load Omics
    def load_omic(path, name):
        if not os.path.exists(path):
            print(f"Warning: {path} not found. Skipping {name}.")
            return None
        df = pd.read_csv(path, index_col=0)
        df = df.T # samples x features
        return df

    rna = load_omic("../Data/expression_log.csv", "RNA (Expression)")
    meth = load_omic("../Data/methylation_mvalues.csv", "Methylation")
    cnv = load_omic("../Data/cnv_log.csv", "CNV")
    
    if rna is None or meth is None or cnv is None:
        raise ValueError("One or more omics files missing.")

    # 3. Intersection
    common_samples = pheno.index.intersection(rna.index).intersection(meth.index).intersection(cnv.index)
    print(f"  Common Samples: {len(common_samples)}")
    
    if len(common_samples) == 0:
        raise ValueError("No common samples found!")

    pheno = pheno.loc[common_samples]
    rna = rna.loc[common_samples]
    meth = meth.loc[common_samples]
    cnv = cnv.loc[common_samples]
    
    # 4. Prepare Labels
    le = LabelEncoder()
    Y = le.fit_transform(pheno[col_name])
    print(f"  Classes: {le.classes_}")
    
    # 5. Survival Data
    T, E = None, None
    if 'days_to_death' in pheno.columns and 'vital_status' in pheno.columns:
        events = (pheno['vital_status'].isin(['Dead', 'Deceased'])).astype(int).values
        times = np.zeros(len(pheno))
        if 'days_to_death' in pheno.columns:
            mask_d = events == 1
            d_times = pd.to_numeric(pheno['days_to_death'], errors='coerce').fillna(0).values
            times[mask_d] = d_times[mask_d]
        col_fup = 'days_to_last_follow_up' if 'days_to_last_follow_up' in pheno.columns else 'days_to_last_followup'
        if col_fup in pheno.columns:
            mask_a = events == 0
            f_times = pd.to_numeric(pheno[col_fup], errors='coerce').fillna(0).values
            times[mask_a] = f_times[mask_a]
        T = times
        E = events

    return rna, meth, cnv, Y, T, E, le.classes_


In [None]:
class PerOmicCMAE(nn.Module):
    def __init__(self, input_dim, latent_dim=64):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LayerNorm(256), nn.GELU(),
            nn.Linear(256, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256), nn.GELU(),
            nn.Linear(256, input_dim)
        )
        self.projector = nn.Sequential(
            nn.Linear(latent_dim, latent_dim), nn.ReLU(),
            nn.Linear(latent_dim, latent_dim)
        )

    def forward(self, x, mask_ratio=0.0):
        if mask_ratio > 0 and self.training:
            mask = (torch.rand_like(x) > mask_ratio).float()
            x_masked = x * mask
        else:
            mask = torch.ones_like(x)
            x_masked = x
        z = self.encoder(x_masked)
        return self.decoder(z), self.projector(z), z, mask

class GatedAttentionFusion(nn.Module):
    def __init__(self, latent_dim=64, num_classes=4, dropout_rate=0.3): 
        super().__init__()
        self.gate_rna = nn.Linear(latent_dim, 1)
        self.gate_meth = nn.Linear(latent_dim, 1)
        self.gate_clin = nn.Linear(latent_dim, 1) # reusing name 'clin' for 'cnv'
        self.classifier = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, num_classes)
        )
        self.drop_rate = dropout_rate

    def forward(self, z_rna, z_meth, z_clin, apply_dropout=False):
        if apply_dropout and self.training:
            if torch.rand(1).item() < self.drop_rate: z_rna = torch.zeros_like(z_rna)
            if torch.rand(1).item() < self.drop_rate: z_meth = torch.zeros_like(z_meth)
            if torch.rand(1).item() < self.drop_rate: z_clin = torch.zeros_like(z_clin)

        w_rna = torch.sigmoid(self.gate_rna(z_rna))
        w_meth = torch.sigmoid(self.gate_meth(z_meth))
        w_clin = torch.sigmoid(self.gate_clin(z_clin))

        z_fused = (w_rna * z_rna + w_meth * z_meth + w_clin * z_clin) / (w_rna + w_meth + w_clin + 1e-8)
        return self.classifier(z_fused), torch.cat([w_rna, w_meth, w_clin], dim=1), z_fused

class StabilizedUncertaintyLoss(nn.Module):
    def __init__(self, num_losses):
        super().__init__()
        self.log_vars = nn.Parameter(torch.zeros(num_losses))
    def forward(self, losses):
        total = 0
        for i, loss in enumerate(losses):
            prec = torch.clamp(0.5 * torch.exp(-self.log_vars[i]), 0.2, 3.0)
            total += prec * loss + 0.5 * self.log_vars[i]
        return total

def contrastive_loss(q, k, queue, temp=0.1):
    q = F.normalize(q, dim=1); k = F.normalize(k, dim=1); queue = queue.detach()
    l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1)
    l_neg = torch.einsum('nc,ck->nk', [q, queue])
    logits = torch.cat([l_pos, l_neg], dim=1) / temp
    return F.cross_entropy(logits, torch.zeros(logits.shape[0], dtype=torch.long).to(q.device))


In [4]:
def run_cv_evaluation(params, n_features, rna_df, meth_df, cnv_df, Y, class_names, is_optuna=True):
    """
    params: dict of hyperparameters
    is_optuna: if True, returns only primary metric (F1 Macro). If False, returns full dict result.
    """
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    fold_metrics = {
        'accuracy': [], 'f1_macro': [], 'f1_micro': [],
        'precision': [], 'recall': [], 'silhouette': [], 'nmi': [], 'ari': []
    }
    
    # Hyperparams
    latent_dim = params.get('latent_dim', 128)
    lr_pre = params.get('lr_pre', 1e-3)
    lr_fine = params.get('lr_fine', 1e-3)
    dropout_rate = params.get('dropout_rate', 0.2)
    mask_ratio = params.get('mask_ratio', 0.5)
    epochs_fine = params.get('epochs_fine', 500)
    patience = params.get('patience', 15)
    
    if not is_optuna:
        print(f"    Params: {params}")

    for fold, (train_idx, val_idx) in enumerate(kf.split(rna_df, Y)):
        # --- A. Data Lease Safe Processing ---
        tr_rna_raw, val_rna_raw = rna_df.iloc[train_idx], rna_df.iloc[val_idx]
        tr_meth_raw, val_meth_raw = meth_df.iloc[train_idx], meth_df.iloc[val_idx]
        tr_cnv_raw, val_cnv_raw = cnv_df.iloc[train_idx], cnv_df.iloc[val_idx]
        
        # Impute
        imp_r = SimpleImputer(strategy='mean'); imp_m = SimpleImputer(strategy='mean'); imp_c = SimpleImputer(strategy='mean')
        tr_rna_imp = imp_r.fit_transform(tr_rna_raw); val_rna_imp = imp_r.transform(val_rna_raw)
        tr_meth_imp = imp_m.fit_transform(tr_meth_raw); val_meth_imp = imp_m.transform(val_meth_raw)
        tr_cnv_imp = imp_c.fit_transform(tr_cnv_raw); val_cnv_imp = imp_c.transform(val_cnv_raw)
        
        # Variance Filter
        def get_top_k_indices(data, k):
            vars = np.var(data, axis=0)
            return np.argpartition(vars, -k)[-k:] if data.shape[1] > k else np.arange(data.shape[1])
            
        r_idx = get_top_k_indices(tr_rna_imp, n_features)
        m_idx = get_top_k_indices(tr_meth_imp, n_features)
        c_idx = get_top_k_indices(tr_cnv_imp, n_features)
        
        tr_rna_sel = tr_rna_imp[:, r_idx]; val_rna_sel = val_rna_imp[:, r_idx]
        tr_meth_sel = tr_meth_imp[:, m_idx]; val_meth_sel = val_meth_imp[:, m_idx]
        tr_cnv_sel = tr_cnv_imp[:, c_idx]; val_cnv_sel = val_cnv_imp[:, c_idx]
        
        # Scale
        sc_r = StandardScaler(); sc_m = StandardScaler(); sc_c = StandardScaler()
        tr_rna = sc_r.fit_transform(tr_rna_sel); val_rna = sc_r.transform(val_rna_sel)
        tr_meth = sc_m.fit_transform(tr_meth_sel); val_meth = sc_m.transform(val_meth_sel)
        tr_cnv = sc_c.fit_transform(tr_cnv_sel); val_cnv = sc_c.transform(val_cnv_sel)
        
        dims = (tr_rna.shape[1], tr_meth.shape[1], tr_cnv.shape[1])
        
        # Tensor
        t_tr_r = torch.FloatTensor(tr_rna).to(DEVICE)
        t_tr_m = torch.FloatTensor(tr_meth).to(DEVICE)
        t_tr_c = torch.FloatTensor(tr_cnv).to(DEVICE)
        t_tr_y = torch.LongTensor(Y[train_idx]).to(DEVICE)
        
        t_val_r = torch.FloatTensor(val_rna).to(DEVICE)
        t_val_m = torch.FloatTensor(val_meth).to(DEVICE)
        t_val_c = torch.FloatTensor(val_cnv).to(DEVICE)
        t_val_y = torch.LongTensor(Y[val_idx]).to(DEVICE)

        # --- B. Model Init ---
        cmae_r = PerOmicCMAE(dims[0], latent_dim).to(DEVICE)
        cmae_m = PerOmicCMAE(dims[1], latent_dim).to(DEVICE)
        cmae_c = PerOmicCMAE(dims[2], latent_dim).to(DEVICE)
        mem_bank = nn.Parameter(F.normalize(torch.randn(latent_dim, 128), dim=0), requires_grad=False).to(DEVICE)
        loss_fn = StabilizedUncertaintyLoss(4).to(DEVICE)
        opt_pre = optim.AdamW(list(cmae_r.parameters())+list(cmae_m.parameters())+list(cmae_c.parameters())+list(loss_fn.parameters()), lr=lr_pre)

        # Pretraining
        cmae_r.train(); cmae_m.train(); cmae_c.train()
        for epoch in range(50):
            rec_r1, proj_r1, _, _ = cmae_r(t_tr_r, mask_ratio=mask_ratio)
            rec_m1, proj_m1, _, _ = cmae_m(t_tr_m, mask_ratio=mask_ratio)
            rec_c1, proj_c1, _, _ = cmae_c(t_tr_c, mask_ratio=mask_ratio)
            with torch.no_grad():
                _, proj_r2, _, _ = cmae_r(t_tr_r, mask_ratio=mask_ratio)
                _, proj_m2, _, _ = cmae_m(t_tr_m, mask_ratio=mask_ratio)
                _, proj_c2, _, _ = cmae_c(t_tr_c, mask_ratio=mask_ratio)
            loss = loss_fn([
                F.mse_loss(rec_r1, t_tr_r), F.mse_loss(rec_m1, t_tr_m), F.mse_loss(rec_c1, t_tr_c),
                (contrastive_loss(proj_r1, proj_r2, mem_bank) + contrastive_loss(proj_m1, proj_m2, mem_bank) + contrastive_loss(proj_c1, proj_c2, mem_bank))/3
            ])
            opt_pre.zero_grad(); loss.backward(); opt_pre.step()
            with torch.no_grad():
                avg_proj = (proj_r1 + proj_m1 + proj_c1) / 3
                mem_bank.data = torch.cat([mem_bank[:, avg_proj.shape[0]:], avg_proj.T], dim=1)

        # Fine-tuning
        cmae_r.eval(); cmae_m.eval(); cmae_c.eval()
        fusion = GatedAttentionFusion(latent_dim, num_classes=4, dropout_rate=dropout_rate).to(DEVICE)
        opt_fine = optim.AdamW(fusion.parameters(), lr=lr_fine)

        best_val_loss = float('inf')
        patience_counter = 0
        best_state = None
        
        with torch.no_grad():
            _, _, zr_tr, _ = cmae_r(t_tr_r); _, _, zm_tr, _ = cmae_m(t_tr_m); _, _, zc_tr, _ = cmae_c(t_tr_c)
            _, _, zr_val, _ = cmae_r(t_val_r); _, _, zm_val, _ = cmae_m(t_val_m); _, _, zc_val, _ = cmae_c(t_val_c)
        
        for epoch in range(epochs_fine):
            fusion.train()
            logits, weights, _ = fusion(zr_tr, zm_tr, zc_tr, apply_dropout=True)
            loss_cls = F.cross_entropy(logits, t_tr_y)
            opt_fine.zero_grad(); loss_cls.backward(); opt_fine.step()
            
            fusion.eval()
            with torch.no_grad():
                v_logits, _, v_fused = fusion(zr_val, zm_val, zc_val, apply_dropout=False)
                v_loss = F.cross_entropy(v_logits, t_val_y)
            
            if v_loss < best_val_loss:
                best_val_loss = v_loss
                best_state = fusion.state_dict()
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience: break
        
        if best_state: fusion.load_state_dict(best_state)
        fusion.eval()
        with torch.no_grad():
            logits, _, z_fused_val = fusion(zr_val, zm_val, zc_val)
            preds = logits.argmax(dim=1).cpu().numpy()
            targets = t_val_y.cpu().numpy()
            z_emb = z_fused_val.cpu().numpy()

        acc = accuracy_score(targets, preds)
        f1_mac = f1_score(targets, preds, average='macro')
        f1_mic = f1_score(targets, preds, average='micro')
        prec = precision_score(targets, preds, average='macro', zero_division=0)
        rec = recall_score(targets, preds, average='macro', zero_division=0)
        
        sil = -1
        if len(np.unique(targets)) > 1:
            try:
                sil = silhouette_score(z_emb, targets)
            except: pass
            
        fold_metrics['f1_macro'].append(f1_mac)
        
        if not is_optuna:
            # Full metrics
            kmeans = KMeans(n_clusters=len(np.unique(Y)), random_state=42, n_init=10).fit(z_emb)
            nmi = normalized_mutual_info_score(targets, kmeans.labels_)
            ari = adjusted_rand_score(targets, kmeans.labels_)
            
            fold_metrics['accuracy'].append(acc)
            fold_metrics['f1_micro'].append(f1_mic)
            fold_metrics['precision'].append(prec)
            fold_metrics['recall'].append(rec)
            fold_metrics['silhouette'].append(sil)
            fold_metrics['nmi'].append(nmi)
            fold_metrics['ari'].append(ari)
            
            if fold == 4: # Viz for last fold of best model
                try:
                    reducer = umap.UMAP(random_state=42)
                    z_umap = reducer.fit_transform(z_emb)
                    plt.figure(figsize=(10, 8))
                    scatter = plt.scatter(z_umap[:, 0], z_umap[:, 1], c=targets, cmap='viridis', s=50, alpha=0.8)
                    plt.title(f'UMAP Projection (Features={n_features})\nAcc={acc:.3f}, F1={f1_mac:.3f}')
                    plt.colorbar(scatter, ticks=range(len(class_names)), label='Class')
                    plt.xlabel('UMAP 1'); plt.ylabel('UMAP 2')
                    plt.tight_layout()
                    plt.savefig(f"cluster_viz_{n_features}_fold5_best.png")
                    plt.close()
                except Exception as ex: print(f"Viz error: {ex}")

    if is_optuna:
        return np.mean(fold_metrics['f1_macro'])
    else:
        return {k: np.mean(v) for k, v in fold_metrics.items() if v}


In [None]:
if __name__ == "__main__":
    # Load Data Once
    rna_df, meth_df, cnv_df, Y, T_surv, E_surv, class_names = load_raw_aligned_data()
    
    # Results file
    param_file = "best_hyperparams_all_features.txt"
    with open(param_file, 'w') as f:
        f.write("Feature_Count | Best_Validation_F1_Macro | Best_Params\n")
    
    all_final_results = []

    print("\n" + "="*60)
    print(f"STARTING OPTUNA OPTIMIZATION ({N_TRIALS} trials per feature set)")
    print("="*60)

    for n_feat in FEATURE_COUNTS:
        print(f"\n>>> OPTIMIZING FOR {n_feat} FEATURES...")
        
        def objective(trial):
            params = {
                'latent_dim': trial.suggest_categorical("latent_dim", [16, 32, 64, 128]),
                'lr_pre': trial.suggest_float("lr_pre", 1e-5, 1e-3, log=True),
                'lr_fine': trial.suggest_float("lr_fine", 1e-5, 1e-3, log=True),
                'dropout_rate': trial.suggest_float("dropout_rate", 0.1, 0.5),
                'mask_ratio': trial.suggest_float("mask_ratio", 0.1, 0.6),
                'epochs_fine': 500,
                'patience': 15
            }
            return run_cv_evaluation(params, n_feat, rna_df, meth_df, cnv_df, Y, class_names, is_optuna=True)

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=N_TRIALS)
        
        best_p = study.best_params
        # Add fixed params for saving
        best_p['epochs_fine'] = 500
        best_p['patience'] = 15
        
        print(f"  Best F1: {study.best_value:.4f}")
        print(f"  Best Params: {best_p}")
        
        # Save to text file
        with open(param_file, 'a') as f:
            f.write(f"{n_feat} | {study.best_value:.4f} | {best_p}\n")
            
        # Run Final Eval with Best Params
        print(f"  Running Final Evaluation for {n_feat} with Best Params...")
        res = run_cv_evaluation(best_p, n_feat, rna_df, meth_df, cnv_df, Y, class_names, is_optuna=False)
        res['n_features'] = n_feat
        all_final_results.append(res)

    print("\n" + "="*60)
    print("FINAL OPTIMIZED RESULTS SUMMARY")
    print("="*60)
    df_res = pd.DataFrame(all_final_results)
    if not df_res.empty:
        cols = ['n_features', 'f1_macro', 'f1_micro', 'precision', 'recall', 'silhouette', 'nmi', 'ari', 'accuracy']
        cols = [c for c in cols if c in df_res.columns]
        print(df_res[cols].round(4).to_string(index=False))
        df_res.to_csv("PerOmicsCMAE_optimized_results.csv", index=False)
        print("\nFull results saved to 'PerOmicsCMAE_optimized_results.csv'")
        print(f"Best hyperparameters saved to '{param_file}'")


>>> LOADING RAW ALIGNED DATA
  Phenotype Samples (filtered): 229


[I 2026-01-20 11:10:49,957] A new study created in memory with name: no-name-1d719864-b968-4796-9101-a13a168608d1


  Common Samples: 205
  Classes: ['Dedifferentiated liposarcoma' 'Fibromyxosarcoma' 'Leiomyosarcoma, NOS'
 'Undifferentiated sarcoma']

STARTING OPTUNA OPTIMIZATION (30 trials per feature set)

>>> OPTIMIZING FOR 2500 FEATURES...


[I 2026-01-20 11:11:25,801] Trial 0 finished with value: 0.41985036895740685 and parameters: {'latent_dim': 64, 'lr_pre': 1.0045863975052194e-05, 'lr_fine': 0.00011396327929068832, 'dropout_rate': 0.31171385000111396, 'mask_ratio': 0.13952458654027902}. Best is trial 0 with value: 0.41985036895740685.
[I 2026-01-20 11:11:56,702] Trial 1 finished with value: 0.6544546207677673 and parameters: {'latent_dim': 128, 'lr_pre': 9.504106003741516e-05, 'lr_fine': 0.000827274690835053, 'dropout_rate': 0.4257932995862319, 'mask_ratio': 0.42446053487037927}. Best is trial 1 with value: 0.6544546207677673.
[I 2026-01-20 11:12:30,603] Trial 2 finished with value: 0.5882442764331308 and parameters: {'latent_dim': 64, 'lr_pre': 0.0009817597732984535, 'lr_fine': 0.000119509047648193, 'dropout_rate': 0.3698265972297132, 'mask_ratio': 0.3702108715504958}. Best is trial 1 with value: 0.6544546207677673.
[I 2026-01-20 11:13:01,531] Trial 3 finished with value: 0.5805052208999577 and parameters: {'latent_di

  Best F1: 0.6565
  Best Params: {'latent_dim': 128, 'lr_pre': 0.0002893561677094744, 'lr_fine': 0.0006003656376963125, 'dropout_rate': 0.42503590114103296, 'mask_ratio': 0.43522810503954523, 'epochs_fine': 500, 'patience': 15}
  Running Final Evaluation for 2500 with Best Params...
    Params: {'latent_dim': 128, 'lr_pre': 0.0002893561677094744, 'lr_fine': 0.0006003656376963125, 'dropout_rate': 0.42503590114103296, 'mask_ratio': 0.43522810503954523, 'epochs_fine': 500, 'patience': 15}


  warn(
[I 2026-01-20 11:27:53,839] A new study created in memory with name: no-name-7a8bd4ca-3b23-4922-97bb-236d62178558



>>> OPTIMIZING FOR 5000 FEATURES...


[I 2026-01-20 11:28:28,535] Trial 0 finished with value: 0.5460429623151061 and parameters: {'latent_dim': 64, 'lr_pre': 5.2301472895546425e-05, 'lr_fine': 0.0002060676434584362, 'dropout_rate': 0.4468146085634642, 'mask_ratio': 0.528921788416767}. Best is trial 0 with value: 0.5460429623151061.
[I 2026-01-20 11:29:03,619] Trial 1 finished with value: 0.528662192199344 and parameters: {'latent_dim': 64, 'lr_pre': 3.229042571378479e-05, 'lr_fine': 0.0001546121458485761, 'dropout_rate': 0.2135568724750558, 'mask_ratio': 0.5043230145202772}. Best is trial 0 with value: 0.5460429623151061.
[I 2026-01-20 11:29:38,614] Trial 2 finished with value: 0.3742118373617229 and parameters: {'latent_dim': 32, 'lr_pre': 8.29825503133397e-05, 'lr_fine': 4.799152770412432e-05, 'dropout_rate': 0.4945181911213702, 'mask_ratio': 0.17874672118914}. Best is trial 0 with value: 0.5460429623151061.
[I 2026-01-20 11:30:13,641] Trial 3 finished with value: 0.6076778703752388 and parameters: {'latent_dim': 128, '

  Best F1: 0.7015
  Best Params: {'latent_dim': 128, 'lr_pre': 0.00021584102172935444, 'lr_fine': 0.0005707043882331389, 'dropout_rate': 0.1544009235029705, 'mask_ratio': 0.2774944218422873, 'epochs_fine': 500, 'patience': 15}
  Running Final Evaluation for 5000 with Best Params...
    Params: {'latent_dim': 128, 'lr_pre': 0.00021584102172935444, 'lr_fine': 0.0005707043882331389, 'dropout_rate': 0.1544009235029705, 'mask_ratio': 0.2774944218422873, 'epochs_fine': 500, 'patience': 15}


  warn(
[I 2026-01-20 11:44:54,824] A new study created in memory with name: no-name-0d675344-0611-48d0-8736-dfd373429d0d



>>> OPTIMIZING FOR 7500 FEATURES...


[I 2026-01-20 11:45:31,283] Trial 0 finished with value: 0.6497708935208936 and parameters: {'latent_dim': 128, 'lr_pre': 0.00015769462409929942, 'lr_fine': 7.517673900444637e-05, 'dropout_rate': 0.3053592576600094, 'mask_ratio': 0.18470729599414906}. Best is trial 0 with value: 0.6497708935208936.
[I 2026-01-20 11:46:07,263] Trial 1 finished with value: 0.6182723754341402 and parameters: {'latent_dim': 128, 'lr_pre': 9.644996742079527e-05, 'lr_fine': 0.00011176992634157218, 'dropout_rate': 0.275065627703872, 'mask_ratio': 0.4729414856086108}. Best is trial 0 with value: 0.6497708935208936.
[I 2026-01-20 11:46:43,192] Trial 2 finished with value: 0.5312763736764767 and parameters: {'latent_dim': 64, 'lr_pre': 2.144369413618792e-05, 'lr_fine': 0.0002537748011886686, 'dropout_rate': 0.14441588560606258, 'mask_ratio': 0.11035295855452495}. Best is trial 0 with value: 0.6497708935208936.
[I 2026-01-20 11:47:16,695] Trial 3 finished with value: 0.6339241315959583 and parameters: {'latent_di

  Best F1: 0.6858
  Best Params: {'latent_dim': 256, 'lr_pre': 0.00015014231996436972, 'lr_fine': 0.0002029699286802775, 'dropout_rate': 0.14730563909082575, 'mask_ratio': 0.12805344738585886, 'epochs_fine': 500, 'patience': 15}
  Running Final Evaluation for 7500 with Best Params...
    Params: {'latent_dim': 256, 'lr_pre': 0.00015014231996436972, 'lr_fine': 0.0002029699286802775, 'dropout_rate': 0.14730563909082575, 'mask_ratio': 0.12805344738585886, 'epochs_fine': 500, 'patience': 15}


  warn(
[I 2026-01-20 12:03:50,440] A new study created in memory with name: no-name-b1bf2b11-f56d-4973-bcd5-91dcae16b7d8



>>> OPTIMIZING FOR 10000 FEATURES...


[I 2026-01-20 12:04:32,123] Trial 0 finished with value: 0.5764919478942228 and parameters: {'latent_dim': 64, 'lr_pre': 0.0009822612302440008, 'lr_fine': 8.857076847870617e-05, 'dropout_rate': 0.10830878838070142, 'mask_ratio': 0.36051339629888124}. Best is trial 0 with value: 0.5764919478942228.
[I 2026-01-20 12:05:12,890] Trial 1 finished with value: 0.540207930042026 and parameters: {'latent_dim': 32, 'lr_pre': 1.5700850476691922e-05, 'lr_fine': 0.000304612501634715, 'dropout_rate': 0.29573194583678586, 'mask_ratio': 0.10579133623702106}. Best is trial 0 with value: 0.5764919478942228.
[I 2026-01-20 12:05:55,299] Trial 2 finished with value: 0.40773691262202155 and parameters: {'latent_dim': 64, 'lr_pre': 3.5714613898221406e-05, 'lr_fine': 6.693134838139036e-05, 'dropout_rate': 0.44316114621316904, 'mask_ratio': 0.5530082192012064}. Best is trial 0 with value: 0.5764919478942228.
[I 2026-01-20 12:06:36,321] Trial 3 finished with value: 0.4850689716279552 and parameters: {'latent_di

  Best F1: 0.6963
  Best Params: {'latent_dim': 256, 'lr_pre': 0.00025967397799518585, 'lr_fine': 0.00046108507289050057, 'dropout_rate': 0.13276906217271206, 'mask_ratio': 0.3279234272082725, 'epochs_fine': 500, 'patience': 15}
  Running Final Evaluation for 10000 with Best Params...
    Params: {'latent_dim': 256, 'lr_pre': 0.00025967397799518585, 'lr_fine': 0.00046108507289050057, 'dropout_rate': 0.13276906217271206, 'mask_ratio': 0.3279234272082725, 'epochs_fine': 500, 'patience': 15}


  warn(
[I 2026-01-20 12:23:35,100] A new study created in memory with name: no-name-f36e9d22-7d68-431b-95af-2652abaf2ff6



>>> OPTIMIZING FOR 15000 FEATURES...


[I 2026-01-20 12:24:19,152] Trial 0 finished with value: 0.5986345185157764 and parameters: {'latent_dim': 256, 'lr_pre': 0.0006019720389987982, 'lr_fine': 6.235268199078091e-05, 'dropout_rate': 0.11407533800587344, 'mask_ratio': 0.39366908205922835}. Best is trial 0 with value: 0.5986345185157764.
[I 2026-01-20 12:25:02,504] Trial 1 finished with value: 0.4325090536766786 and parameters: {'latent_dim': 256, 'lr_pre': 2.2222748172925413e-05, 'lr_fine': 1.6087976458739034e-05, 'dropout_rate': 0.39393907230801606, 'mask_ratio': 0.31565196616130287}. Best is trial 0 with value: 0.5986345185157764.
[I 2026-01-20 12:25:43,678] Trial 2 finished with value: 0.6257223328628977 and parameters: {'latent_dim': 32, 'lr_pre': 0.00013117609804235946, 'lr_fine': 0.0007108992978645359, 'dropout_rate': 0.4259788244250864, 'mask_ratio': 0.42198248605956634}. Best is trial 2 with value: 0.6257223328628977.
[I 2026-01-20 12:26:25,677] Trial 3 finished with value: 0.5421697223539329 and parameters: {'laten

  Best F1: 0.6973
  Best Params: {'latent_dim': 128, 'lr_pre': 0.00042598383553247107, 'lr_fine': 0.0006651507262003653, 'dropout_rate': 0.3718278457325569, 'mask_ratio': 0.10708908436497266, 'epochs_fine': 500, 'patience': 15}
  Running Final Evaluation for 15000 with Best Params...
    Params: {'latent_dim': 128, 'lr_pre': 0.00042598383553247107, 'lr_fine': 0.0006651507262003653, 'dropout_rate': 0.3718278457325569, 'mask_ratio': 0.10708908436497266, 'epochs_fine': 500, 'patience': 15}


  warn(



FINAL OPTIMIZED RESULTS SUMMARY
 n_features  f1_macro  f1_micro  precision  recall  silhouette    nmi    ari  accuracy
       2500    0.6456    0.7659     0.7064  0.6573      0.0805 0.4531 0.3334    0.7659
       5000    0.6383    0.7415     0.7111  0.6380      0.0830 0.3805 0.2731    0.7415
       7500    0.6680    0.7512     0.7073  0.6633      0.0810 0.3974 0.2843    0.7512
      10000    0.6977    0.7707     0.7360  0.6889      0.0860 0.4211 0.3210    0.7707
      15000    0.6863    0.7854     0.7285  0.6979      0.1005 0.4326 0.4089    0.7854

Full results saved to 'PerOmicsCMAE_optimized_results.csv'
Best hyperparameters saved to 'best_hyperparams_all_features.txt'
