In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lifelines.utils import concordance_index
from mofapy2.run.entry_point import entry_point

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

from torch_geometric.nn import GATConv, GCNConv
from torch_geometric.data import Data
from torch_geometric.nn import knn_graph


torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim=64):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, latent_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
        )
    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z


In [3]:
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLPClassifier, self).__init__()
        self.embedding = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU()
        )
        self.classifier = nn.Linear(128, num_classes)
        
    def forward(self, x):
        embeddings = self.embedding(x)
        output = self.classifier(embeddings)
        return output
    
    def get_embeddings(self, x):
        """Extract embeddings from the hidden layer"""
        return self.embedding(x)

In [4]:
print("🔄 Loading preprocessed datasets...")
expression_scaled2 = pd.read_csv("../Updated_model_nd_dataset/processed_expression_FXS_OG.csv", index_col=0)
methylation_scaled2 = pd.read_csv("../Updated_model_nd_dataset/processed_methylation_FXS_OG.csv", index_col=0)
copy_number_scaled2 = pd.read_csv("../Updated_model_nd_dataset/processed_cnv_FXS_OG.csv", index_col=0)
phenotype_data_clean2 = pd.read_csv("../Updated_model_nd_dataset/processed_phenotype_FXS_OG.csv", index_col=0)
labels = pd.read_csv("../Updated_model_nd_dataset/processed_labels_3Omics_FXS_OG.csv", index_col=0)

🔄 Loading preprocessed datasets...


In [5]:
# Mixed Integration System - Hierarchical Multi-Modal Fusion
print("MIXED INTEGRATION SYSTEM - HIERARCHICAL MULTI-MODAL FUSION")
print("=" * 80)

# Transpose data for ML models (samples as rows)
print("🔄 Preparing individual omics datasets...")
expression_data = expression_scaled2.T  # (samples x genes)
methylation_data = methylation_scaled2.T  # (samples x probes)
cnv_data = copy_number_scaled2.T  # (samples x regions)
phenotype_data = phenotype_data_clean2  # (samples x clinical features)

print("📊 Individual Dataset Shapes:")
print(f"   Expression: {expression_data.shape}")
print(f"   Methylation: {methylation_data.shape}")
print(f"   Copy Number: {cnv_data.shape}")
print(f"   Phenotype: {phenotype_data.shape}")
print(f"   Labels: {labels.shape}")

# Verify sample alignment
sample_sets = [
    set(expression_data.index),
    set(methylation_data.index),
    set(cnv_data.index),
    set(phenotype_data.index),
    set(labels.index)
]

all_aligned = all(s == sample_sets[0] for s in sample_sets)
print(f"\n✅ Sample alignment: {'Perfect' if all_aligned else 'Misaligned'}")

# Data quality check
print(f"\n🔍 Data Quality Check:")
print(f"   Expression missing: {expression_data.isnull().sum().sum()}")
print(f"   Methylation missing: {methylation_data.isnull().sum().sum()}")
print(f"   CNV missing: {cnv_data.isnull().sum().sum()}")
print(f"   Phenotype missing: {phenotype_data.isnull().sum().sum()}")

# Prepare individual datasets
omics_datasets = {
    'expression': expression_data.values,
    'methylation': methylation_data.values,
    'cnv': cnv_data.values,
    'phenotype': phenotype_data.values
}

print(f"\n📋 Ready for Mixed Integration Analysis:")
print(f"   Total samples: {len(expression_data)}")
print(f"   Number of modalities: {len(omics_datasets)}")
print(f"   Number of classes: {labels.nunique()}")

print("\n✅ Data preparation completed for hierarchical fusion!")

MIXED INTEGRATION SYSTEM - HIERARCHICAL MULTI-MODAL FUSION
🔄 Preparing individual omics datasets...
📊 Individual Dataset Shapes:
   Expression: (205, 60660)
   Methylation: (205, 220147)
   Copy Number: (205, 56756)
   Phenotype: (205, 78)
   Labels: (205, 1)

✅ Sample alignment: Perfect

🔍 Data Quality Check:
   Expression missing: 0
   Methylation missing: 0
   CNV missing: 0
   Phenotype missing: 3318

📋 Ready for Mixed Integration Analysis:
   Total samples: 205
   Number of modalities: 4
   Number of classes: subtype_encoded    4
dtype: int64

✅ Data preparation completed for hierarchical fusion!


In [6]:
# Mixed Integration Architecture Classes
print("Defining Mixed Integration Architecture...")
print("=" * 50)

class ModalitySpecificAutoencoder(nn.Module):
    """Autoencoder for individual omics modalities"""
    def __init__(self, input_dim, latent_dim=64, hidden_dim=256):
        super(ModalitySpecificAutoencoder, self).__init__()
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, latent_dim),
            nn.ReLU()
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, input_dim)
        )
    
    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z
    
    def encode(self, x):
        """Get only the latent representation"""
        return self.encoder(x)

class ModalitySpecificMLP(nn.Module):
    """MLP for individual omics modalities"""
    def __init__(self, input_dim, embedding_dim=64, hidden_dim=256):
        super(ModalitySpecificMLP, self).__init__()
        self.input_dim = input_dim
        self.embedding_dim = embedding_dim
        
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, embedding_dim),
            nn.ReLU()
        )
    
    def forward(self, x):
        return self.feature_extractor(x)

class HierarchicalFusionAutoencoder(nn.Module):
    """Second-stage autoencoder for fusing modality-specific embeddings"""
    def __init__(self, total_embedding_dim, fusion_latent_dim=32, hidden_dim=128):
        super(HierarchicalFusionAutoencoder, self).__init__()
        self.total_embedding_dim = total_embedding_dim
        self.fusion_latent_dim = fusion_latent_dim
        
        # Fusion encoder
        self.fusion_encoder = nn.Sequential(
            nn.Linear(total_embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, fusion_latent_dim),
            nn.ReLU()
        )
        
        # Fusion decoder
        self.fusion_decoder = nn.Sequential(
            nn.Linear(fusion_latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, total_embedding_dim)
        )
    
    def forward(self, x):
        z_fusion = self.fusion_encoder(x)
        x_recon = self.fusion_decoder(z_fusion)
        return x_recon, z_fusion
    
    def encode(self, x):
        """Get only the fusion representation"""
        return self.fusion_encoder(x)

class HierarchicalFusionMLP(nn.Module):
    """Second-stage MLP for fusing modality-specific embeddings"""
    def __init__(self, total_embedding_dim, num_classes, fusion_dim=64, hidden_dim=128):
        super(HierarchicalFusionMLP, self).__init__()
        
        # Fusion layer
        self.fusion_layer = nn.Sequential(
            nn.Linear(total_embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, fusion_dim),
            nn.ReLU()
        )
        
        # Classifier
        self.classifier = nn.Linear(fusion_dim, num_classes)
    
    def forward(self, x):
        fusion_features = self.fusion_layer(x)
        output = self.classifier(fusion_features)
        return output
    
    def get_fusion_features(self, x):
        """Get fusion layer features"""
        return self.fusion_layer(x)

class MixedIntegrationSystem(nn.Module):
    """Complete Mixed Integration System"""
    def __init__(self, modality_dims, modality_latent_dims, fusion_latent_dim, num_classes):
        super(MixedIntegrationSystem, self).__init__()
        self.modality_names = list(modality_dims.keys())
        self.modality_dims = modality_dims
        self.modality_latent_dims = modality_latent_dims
        
        # Stage 1: Modality-specific autoencoders
        self.modality_autoencoders = nn.ModuleDict()
        for modality, input_dim in modality_dims.items():
            self.modality_autoencoders[modality] = ModalitySpecificAutoencoder(
                input_dim, modality_latent_dims[modality]
            )
        
        # Stage 2: Fusion autoencoder
        total_embedding_dim = sum(modality_latent_dims.values())
        self.fusion_autoencoder = HierarchicalFusionAutoencoder(
            total_embedding_dim, fusion_latent_dim
        )
        
        # Final classifier
        self.classifier = nn.Linear(fusion_latent_dim, num_classes)
    
    def forward(self, modality_inputs):
        # Stage 1: Extract modality-specific embeddings
        modality_embeddings = []
        reconstruction_losses = []
        
        for modality in self.modality_names:
            x = modality_inputs[modality]
            x_recon, z = self.modality_autoencoders[modality](x)
            modality_embeddings.append(z)
            reconstruction_losses.append(F.mse_loss(x_recon, x))
        
        # Concatenate all modality embeddings
        concatenated_embeddings = torch.cat(modality_embeddings, dim=1)
        
        # Stage 2: Fusion
        fusion_recon, fusion_embedding = self.fusion_autoencoder(concatenated_embeddings)
        fusion_loss = F.mse_loss(fusion_recon, concatenated_embeddings)
        
        # Classification
        class_output = self.classifier(fusion_embedding)
        
        return {
            'class_output': class_output,
            'fusion_embedding': fusion_embedding,
            'modality_embeddings': modality_embeddings,
            'reconstruction_losses': reconstruction_losses,
            'fusion_loss': fusion_loss
        }
    
    def get_embeddings(self, modality_inputs):
        """Extract embeddings without gradients"""
        self.eval()
        with torch.no_grad():
            return self.forward(modality_inputs)

print("✅ Mixed Integration Architecture defined!")
print("🏗️ Components:")
print("   - ModalitySpecificAutoencoder: Individual omics encoders")
print("   - HierarchicalFusionAutoencoder: Second-stage fusion")
print("   - MixedIntegrationSystem: Complete end-to-end system")

Defining Mixed Integration Architecture...
✅ Mixed Integration Architecture defined!
🏗️ Components:
   - ModalitySpecificAutoencoder: Individual omics encoders
   - HierarchicalFusionAutoencoder: Second-stage fusion
   - MixedIntegrationSystem: Complete end-to-end system


In [7]:
# Mixed Integration Training Functions
print("Defining Mixed Integration Training Functions...")
print("=" * 50)

def prepare_mixed_integration_data(omics_datasets, labels, test_size=0.2, device='cpu'):
    """Prepare data for mixed integration training"""
    # Get common samples
    n_samples = len(labels)
    indices = np.arange(n_samples)
    
    # Split indices
    train_idx, test_idx = train_test_split(
        indices, test_size=test_size, random_state=42, 
        stratify=labels.values.ravel()
    )
    
    # Split each modality
    train_data = {}
    test_data = {}
    
    for modality, data in omics_datasets.items():
        train_data[modality] = torch.FloatTensor(data[train_idx]).to(device)
        test_data[modality] = torch.FloatTensor(data[test_idx]).to(device)
    
    # Labels
    y_train = torch.LongTensor(labels.values.ravel()[train_idx]).to(device)
    y_test = torch.LongTensor(labels.values.ravel()[test_idx]).to(device)
    
    return {
        'train_data': train_data,
        'test_data': test_data,
        'y_train': y_train,
        'y_test': y_test,
        'train_idx': train_idx,
        'test_idx': test_idx
    }

def train_mixed_integration_system(data_dict, modality_dims, config, device):
    """Train the complete mixed integration system"""
    print(f"🚀 Training Mixed Integration System...")
    print(f"Configuration: {config}")
    
    # Initialize model
    model = MixedIntegrationSystem(
        modality_dims=modality_dims,
        modality_latent_dims=config['modality_latent_dims'],
        fusion_latent_dim=config['fusion_latent_dim'],
        num_classes=config['num_classes']
    ).to(device)
    
    # Optimizers and criteria
    optimizer = optim.Adam(model.parameters(), lr=config['lr'])
    classification_criterion = nn.CrossEntropyLoss()
    
    # Training history
    history = {
        'train_loss': [], 'train_acc': [],
        'recon_losses': [], 'fusion_losses': []
    }
    
    model.train()
    for epoch in range(config['epochs']):
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(data_dict['train_data'])
        
        # Classification loss
        class_loss = classification_criterion(outputs['class_output'], data_dict['y_train'])
        
        # Reconstruction losses
        total_recon_loss = sum(outputs['reconstruction_losses'])
        fusion_loss = outputs['fusion_loss']
        
        # Total loss with weighting
        total_loss = (class_loss + 
                     config['recon_weight'] * total_recon_loss + 
                     config['fusion_weight'] * fusion_loss)
        
        # Backward pass
        total_loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs['class_output'], 1)
        accuracy = (predicted == data_dict['y_train']).float().mean().item()
        
        # Store history
        history['train_loss'].append(total_loss.item())
        history['train_acc'].append(accuracy)
        history['recon_losses'].append(total_recon_loss.item())
        history['fusion_losses'].append(fusion_loss.item())
        
        if (epoch + 1) % 20 == 0:
            print(f"Epoch [{epoch+1}/{config['epochs']}], "
                  f"Total Loss: {total_loss.item():.4f}, "
                  f"Class Loss: {class_loss.item():.4f}, "
                  f"Recon Loss: {total_recon_loss.item():.4f}, "
                  f"Fusion Loss: {fusion_loss.item():.4f}, "
                  f"Accuracy: {accuracy:.4f}")
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        # Training set evaluation
        train_outputs = model(data_dict['train_data'])
        _, train_pred = torch.max(train_outputs['class_output'], 1)
        train_accuracy = (train_pred == data_dict['y_train']).float().mean().item()
        
        # Test set evaluation
        test_outputs = model(data_dict['test_data'])
        _, test_pred = torch.max(test_outputs['class_output'], 1)
        test_accuracy = (test_pred == data_dict['y_test']).float().mean().item()
        
        # Extract embeddings
        train_embeddings = train_outputs['fusion_embedding'].cpu().numpy()
        test_embeddings = test_outputs['fusion_embedding'].cpu().numpy()
        modality_train_embeddings = [emb.cpu().numpy() for emb in train_outputs['modality_embeddings']]
        modality_test_embeddings = [emb.cpu().numpy() for emb in test_outputs['modality_embeddings']]
    
    return {
        'model': model,
        'history': history,
        'train_accuracy': train_accuracy,
        'test_accuracy': test_accuracy,
        'train_embeddings': train_embeddings,
        'test_embeddings': test_embeddings,
        'modality_train_embeddings': modality_train_embeddings,
        'modality_test_embeddings': modality_test_embeddings,
        'train_pred': train_pred.cpu().numpy(),
        'test_pred': test_pred.cpu().numpy()
    }

def train_stage_wise_mixed_integration(data_dict, modality_dims, config, device):
    """Train mixed integration in two separate stages"""
    print(f"🎯 Training Stage-wise Mixed Integration...")
    
    # Stage 1: Train individual modality autoencoders
    print("📍 Stage 1: Training modality-specific autoencoders...")
    modality_models = {}
    modality_embeddings_train = {}
    modality_embeddings_test = {}
    
    for modality, input_dim in modality_dims.items():
        print(f"  Training {modality} autoencoder...")
        
        # Initialize model
        ae = ModalitySpecificAutoencoder(
            input_dim, config['modality_latent_dims'][modality]
        ).to(device)
        
        optimizer = optim.Adam(ae.parameters(), lr=config['lr'])
        criterion = nn.MSELoss()
        
        # Training
        ae.train()
        for epoch in range(config['stage1_epochs']):
            optimizer.zero_grad()
            
            x_recon, z = ae(data_dict['train_data'][modality])
            loss = criterion(x_recon, data_dict['train_data'][modality])
            
            loss.backward()
            optimizer.step()
            
            if (epoch + 1) % 50 == 0:
                print(f"    Epoch [{epoch+1}/{config['stage1_epochs']}], Loss: {loss.item():.6f}")
        
        # Extract embeddings
        ae.eval()
        with torch.no_grad():
            _, z_train = ae(data_dict['train_data'][modality])
            _, z_test = ae(data_dict['test_data'][modality])
            
            modality_embeddings_train[modality] = z_train
            modality_embeddings_test[modality] = z_test
            modality_models[modality] = ae
    
    # Stage 2: Train fusion system
    print("📍 Stage 2: Training fusion system...")
    
    # Concatenate modality embeddings
    train_concat = torch.cat(list(modality_embeddings_train.values()), dim=1)
    test_concat = torch.cat(list(modality_embeddings_test.values()), dim=1)
    
    # Fusion autoencoder + classifier
    total_embedding_dim = sum(config['modality_latent_dims'].values())
    fusion_ae = HierarchicalFusionAutoencoder(
        total_embedding_dim, config['fusion_latent_dim']
    ).to(device)
    
    classifier = nn.Linear(config['fusion_latent_dim'], config['num_classes']).to(device)
    
    # Combined training
    fusion_optimizer = optim.Adam(
        list(fusion_ae.parameters()) + list(classifier.parameters()), 
        lr=config['lr']
    )
    
    recon_criterion = nn.MSELoss()
    class_criterion = nn.CrossEntropyLoss()
    
    fusion_history = {'loss': [], 'acc': []}
    
    for epoch in range(config['stage2_epochs']):
        fusion_optimizer.zero_grad()
        
        # Fusion autoencoder
        fusion_recon, fusion_z = fusion_ae(train_concat)
        recon_loss = recon_criterion(fusion_recon, train_concat)
        
        # Classification
        class_output = classifier(fusion_z)
        class_loss = class_criterion(class_output, data_dict['y_train'])
        
        # Combined loss
        total_loss = recon_loss + class_loss
        total_loss.backward()
        fusion_optimizer.step()
        
        # Accuracy
        _, predicted = torch.max(class_output, 1)
        accuracy = (predicted == data_dict['y_train']).float().mean().item()
        
        fusion_history['loss'].append(total_loss.item())
        fusion_history['acc'].append(accuracy)
        
        if (epoch + 1) % 20 == 0:
            print(f"  Epoch [{epoch+1}/{config['stage2_epochs']}], "
                  f"Loss: {total_loss.item():.4f}, Accuracy: {accuracy:.4f}")
    
    # Final evaluation
    fusion_ae.eval()
    classifier.eval()
    
    with torch.no_grad():
        # Training evaluation
        _, train_fusion_z = fusion_ae(train_concat)
        train_class_output = classifier(train_fusion_z)
        _, train_pred = torch.max(train_class_output, 1)
        train_accuracy = (train_pred == data_dict['y_train']).float().mean().item()
        
        # Test evaluation
        _, test_fusion_z = fusion_ae(test_concat)
        test_class_output = classifier(test_fusion_z)
        _, test_pred = torch.max(test_class_output, 1)
        test_accuracy = (test_pred == data_dict['y_test']).float().mean().item()
    
    return {
        'modality_models': modality_models,
        'fusion_autoencoder': fusion_ae,
        'classifier': classifier,
        'train_accuracy': train_accuracy,
        'test_accuracy': test_accuracy,
        'train_embeddings': train_fusion_z.cpu().numpy(),
        'test_embeddings': test_fusion_z.cpu().numpy(),
        'modality_train_embeddings': {k: v.cpu().numpy() for k, v in modality_embeddings_train.items()},
        'modality_test_embeddings': {k: v.cpu().numpy() for k, v in modality_embeddings_test.items()},
        'fusion_history': fusion_history
    }

print("✅ Mixed Integration Training Functions defined!")
print("🔧 Available training modes:")
print("   - End-to-end: train_mixed_integration_system()")
print("   - Stage-wise: train_stage_wise_mixed_integration()")

Defining Mixed Integration Training Functions...
✅ Mixed Integration Training Functions defined!
🔧 Available training modes:
   - End-to-end: train_mixed_integration_system()
   - Stage-wise: train_stage_wise_mixed_integration()


In [9]:
# Mixed Integration Experimental Setup and Execution
print("MIXED INTEGRATION EXPERIMENTAL SETUP")
print("=" * 80)

# Configuration
num_classes = len(np.unique(labels.values))
modality_dims = {
    'expression': expression_data.shape[1],
    'methylation': methylation_data.shape[1],
    'cnv': cnv_data.shape[1],
    'phenotype': phenotype_data.shape[1]
}

print(f"📊 Experimental Configuration:")
print(f"   Number of classes: {num_classes}")
print(f"   Total samples: {len(labels)}")
print(f"   Device: {device}")
print(f"\n📋 Modality Dimensions:")
for modality, dim in modality_dims.items():
    print(f"   {modality.capitalize()}: {dim:,} features")

# Prepare data
print(f"\n🔄 Preparing mixed integration data...")
mixed_data = prepare_mixed_integration_data(omics_datasets, labels, device=device)

print(f"📊 Data Split Summary:")
print(f"   Training samples: {len(mixed_data['y_train'])}")
print(f"   Test samples: {len(mixed_data['y_test'])}")

# Define experimental configurations
configs = {
    'end_to_end_small': {
        'modality_latent_dims': {
            'expression': 32, 'methylation': 32, 'cnv': 16, 'phenotype': 8
        },
        'fusion_latent_dim': 32,
        'num_classes': num_classes,
        'epochs': 100,
        'lr': 0.001,
        'recon_weight': 0.5,
        'fusion_weight': 0.3
    },
    'end_to_end_medium': {
        'modality_latent_dims': {
            'expression': 64, 'methylation': 64, 'cnv': 32, 'phenotype': 16
        },
        'fusion_latent_dim': 64,
        'num_classes': num_classes,
        'epochs': 100,
        'lr': 0.001,
        'recon_weight': 0.5,
        'fusion_weight': 0.3
    },
    'stage_wise_small': {
        'modality_latent_dims': {
            'expression': 32, 'methylation': 32, 'cnv': 16, 'phenotype': 8
        },
        'fusion_latent_dim': 32,
        'num_classes': num_classes,
        'stage1_epochs': 100,
        'stage2_epochs': 100,
        'lr': 0.001
    },
    'stage_wise_medium': {
        'modality_latent_dims': {
            'expression': 64, 'methylation': 64, 'cnv': 32, 'phenotype': 16
        },
        'fusion_latent_dim': 64,
        'num_classes': num_classes,
        'stage1_epochs': 100,
        'stage2_epochs': 100,
        'lr': 0.001
    }
}

print(f"\n🧪 Experimental Configurations:")
for config_name in configs.keys():
    print(f"   - {config_name}")

print(f"\n✅ Ready to run mixed integration experiments!")

MIXED INTEGRATION EXPERIMENTAL SETUP
📊 Experimental Configuration:
   Number of classes: 4
   Total samples: 205
   Device: cuda

📋 Modality Dimensions:
   Expression: 60,660 features
   Methylation: 220,147 features
   Cnv: 56,756 features
   Phenotype: 78 features

🔄 Preparing mixed integration data...


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [None]:
# Execute Mixed Integration Experiments
print("EXECUTING MIXED INTEGRATION EXPERIMENTS")
print("=" * 80)

# Store all results
mixed_integration_results = {}

# Run End-to-End experiments
print("🚀 Running End-to-End Mixed Integration Experiments...")
print("=" * 60)

for config_name in ['end_to_end_small', 'end_to_end_medium']:
    print(f"\n🔄 Running {config_name}...")
    print("-" * 40)
    
    config = configs[config_name]
    
    try:
        result = train_mixed_integration_system(
            mixed_data, modality_dims, config, device
        )
        mixed_integration_results[config_name] = result
        
        print(f"✅ {config_name} completed:")
        print(f"   Train Accuracy: {result['train_accuracy']:.4f}")
        print(f"   Test Accuracy: {result['test_accuracy']:.4f}")
        
    except Exception as e:
        print(f"❌ Error in {config_name}: {e}")
        mixed_integration_results[config_name] = {'error': str(e)}

# Run Stage-wise experiments
print(f"\n🎯 Running Stage-wise Mixed Integration Experiments...")
print("=" * 60)

for config_name in ['stage_wise_small', 'stage_wise_medium']:
    print(f"\n🔄 Running {config_name}...")
    print("-" * 40)
    
    config = configs[config_name]
    
    try:
        result = train_stage_wise_mixed_integration(
            mixed_data, modality_dims, config, device
        )
        mixed_integration_results[config_name] = result
        
        print(f"✅ {config_name} completed:")
        print(f"   Train Accuracy: {result['train_accuracy']:.4f}")
        print(f"   Test Accuracy: {result['test_accuracy']:.4f}")
        
    except Exception as e:
        print(f"❌ Error in {config_name}: {e}")
        mixed_integration_results[config_name] = {'error': str(e)}

print(f"\n🎉 Mixed Integration Experiments Completed!")
print(f"📊 Results stored for {len(mixed_integration_results)} configurations")

In [None]:
# Mixed Integration Results Analysis and ML Evaluation
print("MIXED INTEGRATION RESULTS ANALYSIS")
print("=" * 80)

# Function to evaluate multiple ML classifiers on embeddings
def evaluate_ml_models_mixed(train_embeddings, y_train, test_embeddings, y_test):
    """Evaluate ML models on mixed integration embeddings"""
    models = {
        'LogisticRegression': LogisticRegression(max_iter=500),
        'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
        'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
        'SVM': SVC(probability=True, random_state=42),
        'KNN': KNeighborsClassifier(n_neighbors=5),
        'NaiveBayes': GaussianNB()
    }
    
    results = {}
    for name, clf in models.items():
        clf.fit(train_embeddings, y_train)
        y_pred = clf.predict(test_embeddings)
        
        # Calculate metrics
        acc = accuracy_score(y_test, y_pred)
        f1_macro = f1_score(y_test, y_pred, average='macro')
        f1_micro = f1_score(y_test, y_pred, average='micro')
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        
        # AUC
        try:
            y_prob = clf.predict_proba(test_embeddings)
            auc = roc_auc_score(y_test, y_prob, multi_class='ovr')
        except:
            auc = None
            
        results[name] = {
            'accuracy': acc,
            'f1_macro': f1_macro,
            'f1_micro': f1_micro,
            'precision': precision,
            'recall': recall,
            'auc': auc
        }
    
    return results

# Analyze results and evaluate with ML models
print("🔍 Analyzing Mixed Integration Results...")

ml_evaluation_results = {}
direct_classification_results = {}

for config_name, result in mixed_integration_results.items():
    if 'error' in result:
        print(f"❌ Skipping {config_name} due to error: {result['error']}")
        continue
    
    print(f"\n📊 Analyzing {config_name}...")
    print("-" * 50)
    
    # Direct classification results
    direct_classification_results[config_name] = {
        'train_accuracy': result['train_accuracy'],
        'test_accuracy': result['test_accuracy']
    }
    
    print(f"🎯 Direct Classification Results:")
    print(f"   Train Accuracy: {result['train_accuracy']:.4f}")
    print(f"   Test Accuracy: {result['test_accuracy']:.4f}")
    
    # ML evaluation on fusion embeddings
    if 'train_embeddings' in result and 'test_embeddings' in result:
        print(f"\n🔍 Evaluating ML models on fusion embeddings...")
        
        ml_results = evaluate_ml_models_mixed(
            result['train_embeddings'], 
            mixed_data['y_train'].cpu().numpy(),
            result['test_embeddings'], 
            mixed_data['y_test'].cpu().numpy()
        )
        
        ml_evaluation_results[config_name] = ml_results
        
        # Find best ML model
        best_ml = max(ml_results.items(), key=lambda x: x[1]['accuracy'])
        print(f"🏆 Best ML Model: {best_ml[0]} (Accuracy: {best_ml[1]['accuracy']:.4f})")
        
        # Display top 3 ML results
        sorted_ml = sorted(ml_results.items(), key=lambda x: x[1]['accuracy'], reverse=True)
        print(f"📋 Top 3 ML Models on Fusion Embeddings:")
        for i, (model_name, metrics) in enumerate(sorted_ml[:3]):
            print(f"   {i+1}. {model_name}: Acc={metrics['accuracy']:.4f}, "
                  f"F1={metrics['f1_macro']:.4f}")

# Overall results summary
print(f"\n🏆 OVERALL MIXED INTEGRATION RESULTS SUMMARY:")
print("=" * 60)

# Create comparison DataFrame
comparison_data = []

for config_name, result in mixed_integration_results.items():
    if 'error' in result:
        continue
        
    # Direct classification
    comparison_data.append({
        'Configuration': config_name,
        'Method': 'Direct_Classification',
        'Accuracy': result['test_accuracy'],
        'F1_Macro': None  # Not calculated for direct
    })
    
    # Best ML model on embeddings
    if config_name in ml_evaluation_results:
        best_ml = max(ml_evaluation_results[config_name].items(), 
                     key=lambda x: x[1]['accuracy'])
        comparison_data.append({
            'Configuration': config_name,
            'Method': f'Embedding_{best_ml[0]}',
            'Accuracy': best_ml[1]['accuracy'],
            'F1_Macro': best_ml[1]['f1_macro']
        })

# Convert to DataFrame and display
comparison_df = pd.DataFrame(comparison_data)
if not comparison_df.empty:
    print("\n📋 Performance Comparison Table:")
    print(comparison_df.round(4).to_string(index=False))
    
    # Find overall best
    best_overall = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
    print(f"\n🥇 Best Overall Performance:")
    print(f"   Configuration: {best_overall['Configuration']}")
    print(f"   Method: {best_overall['Method']}")
    print(f"   Accuracy: {best_overall['Accuracy']:.4f}")

# Visualization
print(f"\n📈 Creating Mixed Integration Visualizations...")

if len([r for r in mixed_integration_results.values() if 'error' not in r]) >= 2:
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Direct Classification Comparison
    configs = [k for k in direct_classification_results.keys()]
    test_accs = [direct_classification_results[k]['test_accuracy'] for k in configs]
    
    axes[0,0].bar(configs, test_accs)
    axes[0,0].set_title('Direct Classification Accuracy')
    axes[0,0].set_ylabel('Test Accuracy')
    axes[0,0].tick_params(axis='x', rotation=45)
    
    # 2. Best ML Model Comparison
    if not comparison_df.empty:
        ml_data = comparison_df[comparison_df['Method'].str.contains('Embedding')]
        if not ml_data.empty:
            sns.barplot(data=ml_data, x='Configuration', y='Accuracy', ax=axes[0,1])
            axes[0,1].set_title('Best ML Model on Embeddings')
            axes[0,1].tick_params(axis='x', rotation=45)
    
    # 3. Method Comparison
    if not comparison_df.empty:
        sns.boxplot(data=comparison_df, x='Method', y='Accuracy', ax=axes[1,0])
        axes[1,0].set_title('Method Performance Distribution')
        axes[1,0].tick_params(axis='x', rotation=45)
    
    # 4. Configuration Comparison
    if not comparison_df.empty:
        config_means = comparison_df.groupby('Configuration')['Accuracy'].mean()
        axes[1,1].bar(config_means.index, config_means.values)
        axes[1,1].set_title('Average Performance by Configuration')
        axes[1,1].set_ylabel('Mean Accuracy')
        axes[1,1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()

# Store comprehensive results
mixed_integration_comprehensive_results = {
    'experiment_results': mixed_integration_results,
    'ml_evaluation_results': ml_evaluation_results,
    'direct_classification_results': direct_classification_results,
    'comparison_df': comparison_df,
    'configurations': configs,
    'modality_dims': modality_dims
}

print(f"\n💾 Comprehensive results stored in 'mixed_integration_comprehensive_results'")
print(f"✅ Mixed Integration Analysis Complete!")
print(f"🎯 Tested {len(mixed_integration_results)} configurations with hierarchical fusion")
print(f"🏗️ Architecture: Individual Omics AE → Fusion AE → Classification")
print(f"📊 Best accuracy achieved: {comparison_df['Accuracy'].max():.4f}" if not comparison_df.empty else "")