*tried splitting the code into cells for better division

In [None]:
# Cell 1: Imports and Configuration
import os
import pandas as pd
import numpy as np
import warnings
import logging
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass

# ML imports
from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score,
                             accuracy_score, roc_curve, precision_recall_curve, auc)
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from scipy.stats import t

# Optional dependencies with proper error handling
try:
    from tabpfn import TabPFNClassifier
    TABPFN_AVAILABLE = True
except ImportError:
    TABPFN_AVAILABLE = False
    print("TabPFN not available. Install with: pip install tabpfn")

try:
    import xgboost as xgb
    XGBOOST_AVAILABLE = True
except ImportError:
    XGBOOST_AVAILABLE = False
    print("XGBoost not available. Install with: pip install xgboost")

try:
    from pytorch_tabnet.tab_model import TabNetClassifier
    import torch
    TABNET_AVAILABLE = True
except ImportError:
    TABNET_AVAILABLE = False
    print("TabNet not available. Install with: pip install pytorch-tabnet torch")

# Configuration
@dataclass
class AnalysisConfig:
    """Configuration for the analysis"""
    # File paths - CHANGE THESE TO YOUR PATHS
    data_base_path: str = "/Users/joi263/Documents/MultimodalTabData/data"
    
    # Dataset configurations
    datasets: Dict[str, str] = None
    
    # ML parameters
    test_size: float = 0.25
    cv_folds: int = 5
    random_state: int = 42
    max_features: int = 100
    missing_threshold: float = 0.5
    min_class_size: int = 3
    min_sample_size: int = 15
    
    # Confidence interval
    confidence_level: float = 0.95
    
    def __post_init__(self):
        if self.datasets is None:
            self.datasets = {
                'ConvNext': f'{self.data_base_path}/convnext_data/convnext_cleaned_master.csv',
                'ViT': f'{self.data_base_path}/vit_base_data/vit_base_cleaned_master.csv',
                'ResNet50_Pretrained': f'{self.data_base_path}/pretrained_resnet50_data/pretrained_resnet50_cleaned_master.csv',
                'ResNet50_ImageNet': f'{self.data_base_path}/imagenet_resnet50_data/imagenet_resnet50_cleaned_master.csv',
                'EfficientNet': f'{self.data_base_path}/efficientnet_data/efficientnet_cleaned_master.csv'
            }

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Initialize configuration
config = AnalysisConfig()

print("Configuration initialized successfully!")
print(f"TabPFN available: {TABPFN_AVAILABLE}")
print(f"XGBoost available: {XGBOOST_AVAILABLE}")
print(f"TabNet available: {TABNET_AVAILABLE}")

In [None]:
# Cell 2: File Validation Functions

def validate_file_paths(datasets: Dict[str, str]) -> Dict[str, bool]:
    """
    Validate that all dataset files exist
    
    Args:
        datasets: Dictionary mapping dataset names to file paths
        
    Returns:
        Dictionary mapping dataset names to existence status
    """
    print("CHECKING DATA FILE PATHS:")
    print("=" * 50)
    
    file_status = {}
    existing_count = 0
    
    for name, path in datasets.items():
        exists = Path(path).exists()
        status = "EXISTS" if exists else "NOT FOUND"
        print(f"{name:<20}: {status}")
        
        file_status[name] = exists
        if exists:
            existing_count += 1
    
    print(f"Found {existing_count}/{len(datasets)} files")
    print("=" * 50, "\n")
    
    return file_status

def load_and_validate_dataset(file_path: str, dataset_name: str) -> Optional[pd.DataFrame]:
    """
    Load and perform basic validation on a dataset
    
    Args:
        file_path: Path to the CSV file
        dataset_name: Name of the dataset for logging
        
    Returns:
        DataFrame if successful, None if failed
    """
    try:
        if not Path(file_path).exists():
            logger.error(f"File not found: {file_path}")
            return None
            
        df = pd.read_csv(file_path)
        
        if df.empty:
            logger.error(f"Dataset {dataset_name} is empty")
            return None
            
        logger.info(f"Loaded {dataset_name}: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
        
    except Exception as e:
        logger.error(f"Failed to load {dataset_name}: {str(e)}")
        return None

def get_dataset_summary(df: pd.DataFrame, dataset_name: str) -> Dict[str, Any]:
    """
    Generate summary statistics for a dataset
    
    Args:
        df: DataFrame to analyze
        dataset_name: Name of the dataset
        
    Returns:
        Dictionary containing summary statistics
    """
    summary = {
        'dataset_name': dataset_name,
        'n_rows': len(df),
        'n_columns': len(df.columns),
        'missing_data_percent': (df.isnull().sum().sum() / (len(df) * len(df.columns))) * 100,
        'numeric_columns': len(df.select_dtypes(include=[np.number]).columns),
        'categorical_columns': len(df.select_dtypes(include=['object']).columns),
        'memory_usage_mb': df.memory_usage(deep=True).sum() / 1024**2
    }
    
    return summary

# Run file validation
file_status = validate_file_paths(config.datasets)
print("File validation completed!")

In [None]:
# Cell 3: Target Creation Functions

def create_mortality_targets(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
    """
    Create mortality prediction targets at different time points
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary with mortality data and targets, or None if insufficient data
    """
    required_cols = ['survival', 'patient_status']
    if not all(col in df.columns for col in required_cols):
        logger.warning("Missing required columns for mortality targets")
        return None
    
    # Filter for valid survival data
    survival_df = df[df['survival'].notna() & df['patient_status'].notna()].copy()
    
    if len(survival_df) < config.min_sample_size:
        logger.warning(f"Insufficient survival data: {len(survival_df)} samples")
        return None
    
    # Create mortality targets (patient_status==2 means death)
    survival_df['mortality_6mo'] = (
        (survival_df['patient_status'] == 2) & 
        (survival_df['survival'] <= 6)
    ).astype(int)
    
    survival_df['mortality_1yr'] = (
        (survival_df['patient_status'] == 2) & 
        (survival_df['survival'] <= 12)
    ).astype(int)
    
    survival_df['mortality_2yr'] = (
        (survival_df['patient_status'] == 2) & 
        (survival_df['survival'] <= 24)
    ).astype(int)
    
    targets = ['mortality_6mo', 'mortality_1yr', 'mortality_2yr']
    descriptions = ['6-month Mortality', '1-year Mortality', '2-year Mortality']
    
    # Log class distributions
    for target, desc in zip(targets, descriptions):
        pos_count = survival_df[target].sum()
        total_count = len(survival_df)
        logger.info(f"{desc}: {pos_count}/{total_count} ({pos_count/total_count*100:.1f}% positive)")
    
    return {
        'data': survival_df,
        'targets': targets,
        'descriptions': descriptions,
        'category': 'mortality'
    }

def create_tumor_grade_targets(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
    """
    Create tumor grade classification targets
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary with tumor grade data and targets, or None if insufficient data
    """
    if 'methylation_class' not in df.columns:
        logger.warning("Missing methylation_class column for tumor grade targets")
        return None
    
    tumor_df = df[df['methylation_class'].notna()].copy()
    
    if len(tumor_df) < config.min_sample_size:
        logger.warning(f"Insufficient tumor grade data: {len(tumor_df)} samples")
        return None
    
    # Define high-grade tumor patterns
    high_grade_terms = [
        'glioblastoma', 'anaplastic', 'high grade', 
        'grade iv', 'grade 4', 'gbm'
    ]
    
    pattern = '|'.join(high_grade_terms)
    tumor_df['high_grade'] = (
        tumor_df['methylation_class']
        .str.lower()
        .str.contains(pattern, na=False)
        .astype(int)
    )
    
    # Log class distribution
    pos_count = tumor_df['high_grade'].sum()
    total_count = len(tumor_df)
    logger.info(f"High-grade tumors: {pos_count}/{total_count} ({pos_count/total_count*100:.1f}%)")
    
    return {
        'data': tumor_df,
        'targets': ['high_grade'],
        'descriptions': ['High vs Low Grade Tumor'],
        'category': 'tumor_grade'
    }

def create_idh_targets(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
    """
    Create IDH mutation status targets
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary with IDH data and targets, or None if insufficient data
    """
    if 'idh_1_r132h' not in df.columns:
        logger.warning("Missing idh_1_r132h column for IDH targets")
        return None
    
    idh_df = df.copy()
    idh_df['idh_binary'] = np.nan
    
    # First, check text-based IDH1 column if available
    if 'idh1' in df.columns:
        idh_text = df['idh1'].astype(str).str.lower()
        mutation_patterns = ['r132h', 'r132s', 'arg132', 'missense', 'p.arg132']
        pattern = '|'.join(mutation_patterns)
        
        mutation_mask = idh_text.str.contains(pattern, na=False)
        idh_df.loc[mutation_mask, 'idh_binary'] = 1
    
    # Then use coded values for remaining cases
    remaining_mask = idh_df['idh_binary'].isna() & idh_df['idh_1_r132h'].notna()
    
    # Assuming: 1=wild-type, 2=mutant, 3=unknown
    idh_df.loc[remaining_mask & (idh_df['idh_1_r132h'] == 2), 'idh_binary'] = 1
    idh_df.loc[remaining_mask & (idh_df['idh_1_r132h'] == 1), 'idh_binary'] = 0
    idh_df.loc[idh_df['idh_1_r132h'] == 3, 'idh_binary'] = np.nan
    
    # Filter to valid cases
    valid_idh_df = idh_df[idh_df['idh_binary'].notna()].copy()
    
    if len(valid_idh_df) < config.min_sample_size:
        logger.warning(f"Insufficient IDH data: {len(valid_idh_df)} samples")
        return None
    
    # Log class distribution
    pos_count = valid_idh_df['idh_binary'].sum()
    total_count = len(valid_idh_df)
    logger.info(f"IDH mutations: {pos_count}/{total_count} ({pos_count/total_count*100:.1f}%)")
    
    return {
        'data': valid_idh_df,
        'targets': ['idh_binary'],
        'descriptions': ['IDH Mutation Status'],
        'category': 'idh'
    }

def create_mgmt_targets(df: pd.DataFrame) -> Optional[Dict[str, Any]]:
    """
    Create MGMT methylation status targets
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary with MGMT data and targets, or None if insufficient data
    """
    if 'mgmt' not in df.columns:
        logger.warning("Missing mgmt column for MGMT targets")
        return None
    
    mgmt_df = df[df['mgmt'].notna()].copy()
    
    if len(mgmt_df) < config.min_sample_size:
        logger.warning(f"Insufficient MGMT data: {len(mgmt_df)} samples")
        return None
    
    mgmt_df['mgmt_binary'] = np.nan
    
    # Assuming: 1=methylated, 2=unmethylated, 3=unknown
    mgmt_df.loc[mgmt_df['mgmt'] == 1, 'mgmt_binary'] = 1
    mgmt_df.loc[mgmt_df['mgmt'] == 2, 'mgmt_binary'] = 0
    mgmt_df.loc[mgmt_df['mgmt'] == 3, 'mgmt_binary'] = np.nan
    
    # Filter to valid cases
    valid_mgmt_df = mgmt_df[mgmt_df['mgmt_binary'].notna()].copy()
    
    if len(valid_mgmt_df) < config.min_sample_size:
        logger.warning(f"Insufficient valid MGMT data: {len(valid_mgmt_df)} samples")
        return None
    
    # Log class distribution
    pos_count = valid_mgmt_df['mgmt_binary'].sum()
    total_count = len(valid_mgmt_df)
    logger.info(f"MGMT methylation: {pos_count}/{total_count} ({pos_count/total_count*100:.1f}%)")
    
    return {
        'data': valid_mgmt_df,
        'targets': ['mgmt_binary'],
        'descriptions': ['MGMT Methylation Status'],
        'category': 'mgmt'
    }

def create_all_targets(df: pd.DataFrame) -> Dict[str, Dict[str, Any]]:
    """
    Create all prediction targets for a dataset
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary mapping target categories to target information
    """
    logger.info("Creating prediction targets...")
    
    all_targets = {}
    
    # Create each target type
    target_creators = [
        ('mortality', create_mortality_targets),
        ('tumor_grade', create_tumor_grade_targets),
        ('idh', create_idh_targets),
        ('mgmt', create_mgmt_targets)
    ]
    
    for target_name, creator_func in target_creators:
        try:
            target_info = creator_func(df)
            if target_info is not None:
                all_targets[target_name] = target_info
                logger.info(f"Successfully created {target_name} targets")
            else:
                logger.warning(f"Failed to create {target_name} targets")
        except Exception as e:
            logger.error(f"Error creating {target_name} targets: {str(e)}")
    
    logger.info(f"Created {len(all_targets)} target categories")
    return all_targets

print("Target creation functions defined successfully!")

In [None]:
# Cell 4: Feature Selection and Preprocessing Functions

def get_feature_categories(df: pd.DataFrame) -> Dict[str, List[str]]:
    """
    Categorize features into clinical, molecular, and imaging groups
    
    Args:
        df: Input dataframe
        
    Returns:
        Dictionary mapping feature categories to lists of column names
    """
    clinical_features = ['age', 'sex', 'race', 'ethnicity', 'gtr']
    molecular_features = [
        'mgmt_pyro', 'atrx', 'p53', 'braf_v600', 
        'h3k27m', 'gfap', 'tumor', 'hg_glioma'
    ]
    
    # Find imaging features (those starting with 'feature_')
    imaging_features = [col for col in df.columns if col.startswith('feature_')]
    
    # Filter to only include features that exist in the dataframe
    available_features = {
        'clinical': [f for f in clinical_features if f in df.columns],
        'molecular': [f for f in molecular_features if f in df.columns],
        'imaging': imaging_features
    }
    
    # Log feature counts
    for category, features in available_features.items():
        logger.info(f"{category.capitalize()} features: {len(features)}")
    
    return available_features

def select_features_for_target(all_features: List[str], target_name: str) -> List[str]:
    """
    Select appropriate features for a specific target, avoiding data leakage
    
    Args:
        all_features: List of all available features
        target_name: Name of the target variable
        
    Returns:
        List of safe features for this target
    """
    # Define features that should be excluded for each target to prevent leakage
    unsafe_patterns = {
        'idh_binary': ['idh'],
        'mgmt_binary': ['mgmt'],
        'high_grade': [],  # No specific exclusions for tumor grade
        'mortality_6mo': [],
        'mortality_1yr': [],
        'mortality_2yr': []
    }
    
    patterns_to_exclude = unsafe_patterns.get(target_name, [])
    
    # Filter out unsafe features
    safe_features = []
    for feature in all_features:
        is_safe = True
        for pattern in patterns_to_exclude:
            if pattern.lower() in feature.lower():
                is_safe = False
                break
        if is_safe:
            safe_features.append(feature)
    
    excluded_count = len(all_features) - len(safe_features)
    if excluded_count > 0:
        logger.info(f"Excluded {excluded_count} features for {target_name} to prevent leakage")
    
    return safe_features

def handle_missing_values(df: pd.DataFrame, features: List[str]) -> Tuple[pd.DataFrame, List[str]]:
    """
    Handle missing values in features
    
    Args:
        df: Input dataframe
        features: List of feature names
        
    Returns:
        Tuple of (processed dataframe, final feature list)
    """
    df_processed = df.copy()
    
    # Calculate missing percentages
    missing_percentages = df_processed[features].isnull().mean()
    
    # Remove features with too much missing data
    features_to_keep = missing_percentages[missing_percentages <= config.missing_threshold].index.tolist()
    features_removed = len(features) - len(features_to_keep)
    
    if features_removed > 0:
        logger.info(f"Removed {features_removed} features due to >50% missing data")
    
    # Impute remaining missing values
    numeric_features = df_processed[features_to_keep].select_dtypes(include=[np.number]).columns
    categorical_features = df_processed[features_to_keep].select_dtypes(include=['object']).columns
    
    # Impute numeric features with median
    for feature in numeric_features:
        if df_processed[feature].isnull().any():
            median_value = df_processed[feature].median()
            df_processed[feature].fillna(median_value, inplace=True)
            logger.debug(f"Imputed {feature} with median: {median_value}")
    
    # Impute categorical features with mode
    for feature in categorical_features:
        if df_processed[feature].isnull().any():
            mode_value = df_processed[feature].mode().iloc[0] if not df_processed[feature].mode().empty else 'Unknown'
            df_processed[feature].fillna(mode_value, inplace=True)
            logger.debug(f"Imputed {feature} with mode: {mode_value}")
    
    return df_processed, features_to_keep

def encode_categorical_features(df: pd.DataFrame, features: List[str], target: str) -> pd.DataFrame:
    """
    Encode categorical features using label encoding
    
    Args:
        df: Input dataframe
        features: List of feature names
        target: Target variable name
        
    Returns:
        DataFrame with encoded categorical features
    """
    df_encoded = df.copy()
    
    # Find categorical features (excluding target)
    categorical_features = df_encoded[features].select_dtypes(include=['object']).columns.tolist()
    
    if categorical_features:
        logger.info(f"Encoding {len(categorical_features)} categorical features")
        
        for feature in categorical_features:
            try:
                le = LabelEncoder()
                df_encoded[feature] = le.fit_transform(df_encoded[feature].astype(str))
                logger.debug(f"Encoded {feature}: {len(le.classes_)} classes")
            except Exception as e:
                logger.error(f"Failed to encode {feature}: {str(e)}")
                # Remove problematic feature
                if feature in features:
                    features.remove(feature)
    
    return df_encoded

def perform_feature_selection(X: np.ndarray, y: np.ndarray, max_features: int = None) -> np.ndarray:
    """
    Perform feature selection if there are too many features
    
    Args:
        X: Feature matrix
        y: Target vector
        max_features: Maximum number of features to keep
        
    Returns:
        Transformed feature matrix
    """
    if max_features is None:
        max_features = config.max_features
    
    if X.shape[1] <= max_features:
        return X
    
    logger.info(f"Reducing features from {X.shape[1]} to {max_features} using univariate selection")
    
    try:
        selector = SelectKBest(score_func=f_classif, k=max_features)
        X_selected = selector.fit_transform(X, y)
        logger.info(f"Feature selection completed: {X_selected.shape[1]} features retained")
        return X_selected
    except Exception as e:
        logger.error(f"Feature selection failed: {str(e)}, using original features")
        return X

def preprocess_data(df: pd.DataFrame, features: List[str], target: str) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[str]]:
    """
    Complete preprocessing pipeline for features and target
    
    Args:
        df: Input dataframe
        features: List of feature names
        target: Target variable name
        
    Returns:
        Tuple of (X, y, error_message)
    """
    try:
        # Filter to samples with valid target values
        data = df[features + [target]].dropna(subset=[target]).copy()
        
        if len(data) < config.min_sample_size:
            return None, None, f"Insufficient data: {len(data)} samples (minimum: {config.min_sample_size})"
        
        # Handle missing values
        data_processed, final_features = handle_missing_values(data, features)
        
        if len(final_features) == 0:
            return None, None, "No features remaining after missing value handling"
        
        # Encode categorical features
        data_encoded = encode_categorical_features(data_processed, final_features, target)
        
        # Extract feature matrix and target vector
        X = data_encoded[final_features].values
        y = data_encoded[target].values
        
        # Check class balance
        unique_classes, class_counts = np.unique(y, return_counts=True)
        min_class_size = min(class_counts)
        
        if min_class_size < config.min_class_size:
            return None, None, f"Smallest class has only {min_class_size} samples (minimum: {config.min_class_size})"
        
        # Log class distribution
        class_distribution = dict(zip(unique_classes, class_counts))
        logger.info(f"Class distribution: {class_distribution}")
        
        # Perform feature selection if needed
        X_selected = perform_feature_selection(X, y)
        
        logger.info(f"Preprocessing completed: {X_selected.shape[0]} samples, {X_selected.shape[1]} features")
        return X_selected, y, None
        
    except Exception as e:
        error_msg = f"Preprocessing failed: {str(e)}"
        logger.error(error_msg)
        return None, None, error_msg

print("Feature selection and preprocessing functions defined successfully!")

In [None]:
# Cell 5: Machine Learning Algorithm Configuration

def get_algorithm_config(algorithm_name: str) -> Optional[Dict[str, Any]]:
    """
    Get configuration for a specific algorithm
    
    Args:
        algorithm_name: Name of the algorithm
        
    Returns:
        Dictionary with model and configuration, or None if not available
    """
    configs = {
        'TabPFN': {
            'available': TABPFN_AVAILABLE,
            'model': lambda: TabPFNClassifier(device='cpu', N_ensemble_configurations=4),
            'needs_scaling': False,
            'description': 'Transformer-based Prior-Data Fitted Networks'
        },
        
        'XGBoost': {
            'available': XGBOOST_AVAILABLE,
            'model': lambda: xgb.XGBClassifier(
                n_estimators=300,
                max_depth=4,
                learning_rate=0.05,
                subsample=0.8,
                colsample_bytree=0.8,
                min_child_weight=3,
                reg_alpha=1,
                reg_lambda=1,
                random_state=config.random_state,
                eval_metric='logloss',
                use_label_encoder=False,
                verbosity=0
            ),
            'needs_scaling': False,
            'description': 'Gradient Boosting Decision Trees'
        },
        
        'TabNet': {
            'available': TABNET_AVAILABLE,
            'model': lambda: TabNetClassifier(
                n_d=64,
                n_a=64,
                n_steps=5,
                gamma=1.5,
                lambda_sparse=1e-4,
                optimizer_fn=torch.optim.Adam,
                optimizer_params=dict(lr=0.01, weight_decay=1e-5),
                mask_type='entmax',
                scheduler_fn=torch.optim.lr_scheduler.StepLR,
                scheduler_params={'step_size': 20, 'gamma': 0.8},
                verbose=0,
                seed=config.random_state
            ),
            'needs_scaling': True,
            'description': 'Attentive Interpretable Tabular Learning'
        },
        
        'RandomForest': {
            'available': True,
            'model': lambda: RandomForestClassifier(
                n_estimators=500,
                max_depth=8,
                min_samples_split=10,
                min_samples_leaf=5,
                max_features='sqrt',
                oob_score=True,
                class_weight='balanced',
                random_state=config.random_state,
                n_jobs=-1
            ),
            'needs_scaling': False,
            'description': 'Ensemble of Decision Trees'
        },
        
        'LogisticRegression': {
            'available': True,
            'model': lambda: LogisticRegression(
                penalty='elasticnet',
                l1_ratio=0.5,
                C=0.1,
                solver='saga',
                max_iter=2000,
                class_weight='balanced',
                random_state=config.random_state,
                n_jobs=-1
            ),
            'needs_scaling': True,
            'description': 'Linear Classification with Regularization'
        },
        
        'SVM': {
            'available': True,
            'model': lambda: SVC(
                kernel='rbf',
                C=1.0,
                gamma='scale',
                probability=True,
                class_weight='balanced',
                random_state=config.random_state
            ),
            'needs_scaling': True,
            'description': 'Support Vector Machine with RBF Kernel'
        }
    }
    
    if algorithm_name not in configs:
        logger.error(f"Unknown algorithm: {algorithm_name}")
        return None
    
    config_dict = configs[algorithm_name]
    if not config_dict['available']:
        logger.warning(f"{algorithm_name} is not available (missing dependencies)")
        return None
    
    # Create model instance
    try:
        model_instance = config_dict['model']()
        return {
            'model': model_instance,
            'needs_scaling': config_dict['needs_scaling'],
            'description': config_dict['description']
        }
    except Exception as e:
        logger.error(f"Failed to initialize {algorithm_name}: {str(e)}")
        return None

def get_available_algorithms() -> Dict[str, Dict[str, Any]]:
    """
    Get all available algorithms
    
    Returns:
        Dictionary mapping algorithm names to their configurations
    """
    algorithm_names = ['TabPFN', 'XGBoost', 'TabNet', 'RandomForest', 'LogisticRegression', 'SVM']
    
    available_algorithms = {}
    for name in algorithm_names:
        config_dict = get_algorithm_config(name)
        if config_dict is not None:
            available_algorithms[name] = config_dict
    
    logger.info(f"Available algorithms: {list(available_algorithms.keys())}")
    return available_algorithms

def get_scaler(scaler_type: str = 'robust'):
    """
    Get a data scaler
    
    Args:
        scaler_type: Type of scaler ('robust', 'standard')
        
    Returns:
        Scaler instance
    """
    if scaler_type == 'robust':
        return RobustScaler(quantile_range=(10, 90))
    elif scaler_type == 'standard':
        return StandardScaler()
    else:
        raise ValueError(f"Unknown scaler type: {scaler_type}")

# Initialize available algorithms
available_algorithms = get_available_algorithms()
print("Machine learning algorithm configurations loaded successfully!")
print(f"Available algorithms: {list(available_algorithms.keys())}")

In [None]:
# Cell 6: Model Training and Evaluation Functions

def calculate_classification_metrics(y_true: np.ndarray, y_pred: np.ndarray, y_prob: np.ndarray) -> Dict[str, float]:
    """
    Calculate comprehensive classification metrics
    
    Args:
        y_true: True labels
        y_pred: Predicted labels
        y_prob: Predicted probabilities
        
    Returns:
        Dictionary of metrics
    """
    try:
        # Basic metrics
        accuracy = accuracy_score(y_true, y_pred)
        
        # AUC-ROC
        try:
            auc_score = roc_auc_score(y_true, y_prob)
        except ValueError as e:
            logger.warning(f"Could not calculate AUC: {str(e)}")
            auc_score = 0.5
        
        # Confusion matrix and derived metrics
        cm = confusion_matrix(y_true, y_pred)
        
        if cm.shape == (2, 2):
            tn, fp, fn, tp = cm.ravel()
            
            # Calculate metrics with division by zero protection
            sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0.0
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
            ppv = tp / (tp + fp) if (tp + fp) > 0 else 0.0  # Positive Predictive Value
            npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0  # Negative Predictive Value
            
            balanced_accuracy = (sensitivity + specificity) / 2
            f1_score = 2 * (ppv * sensitivity) / (ppv + sensitivity) if (ppv + sensitivity) > 0 else 0.0
            
        else:
            # Multi-class case (shouldn't happen with binary classification)
            sensitivity = specificity = ppv = npv = balanced_accuracy = f1_score = 0.0
        
        return {
            'accuracy': accuracy,
            'auc': auc_score,
            'sensitivity': sensitivity,
            'specificity': specificity,
            'ppv': ppv,
            'npv': npv,
            'balanced_accuracy': balanced_accuracy,
            'f1_score': f1_score,
            'confusion_matrix': cm,
            'n_test': len(y_true)
        }
        
    except Exception as e:
        logger.error(f"Error calculating metrics: {str(e)}")
        return {
            'accuracy': 0.0, 'auc': 0.5, 'sensitivity': 0.0, 'specificity': 0.0,
            'ppv': 0.0, 'npv': 0.0, 'balanced_accuracy': 0.0, 'f1_score': 0.0,
            'confusion_matrix': np.array([[0, 0], [0, 0]]), 'n_test': len(y_true)
        }

def train_single_algorithm(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, 
                          y_test: np.ndarray, algorithm_name: str, algorithm_config: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """
    Train and evaluate a single algorithm
    
    Args:
        X_train: Training features
        X_test: Test features
        y_train: Training labels
        y_test: Test labels
        algorithm_name: Name of the algorithm
        algorithm_config: Algorithm configuration
        
    Returns:
        Dictionary of results or None if failed
    """
    try:
        logger.info(f"Training {algorithm_name}...")
        
        model = algorithm_config['model']
        needs_scaling = algorithm_config['needs_scaling']
        
        # Scale data if needed
        if needs_scaling:
            scaler = get_scaler('robust')
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
        else:
            X_train_scaled = X_train
            X_test_scaled = X_test
        
        # Train model with algorithm-specific handling
        if algorithm_name == 'TabNet' and TABNET_AVAILABLE:
            model.fit(
                X_train_scaled, y_train,
                eval_set=[(X_test_scaled, y_test)],
                patience=20,
                max_epochs=100,
                eval_metric=['auc'],
                batch_size=min(256, len(X_train) // 4)
            )
            y_prob = model.predict_proba(X_test_scaled)[:, 1]
            y_pred = (y_prob > 0.5).astype(int)
            
        elif algorithm_name == 'XGBoost' and XGBOOST_AVAILABLE:
            try:
                model.fit(
                    X_train_scaled, y_train,
                    eval_set=[(X_test_scaled, y_test)],
                    verbose=False
                )
            except TypeError:
                # Fallback if eval_set is not supported
                model.fit(X_train_scaled, y_train)
            
            y_pred = model.predict(X_test_scaled)
            y_prob = model.predict_proba(X_test_scaled)[:, 1]
            
        else:
            # Standard sklearn interface
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
            
            if hasattr(model, 'predict_proba'):
                y_prob = model.predict_proba(X_test_scaled)[:, 1]
            else:
                y_prob = y_pred.astype(float)
        
        # Calculate metrics
        metrics = calculate_classification_metrics(y_test, y_pred, y_prob)
        metrics['scaling_used'] = needs_scaling
        metrics['algorithm'] = algorithm_name
        
        logger.info(f"{algorithm_name} - AUC: {metrics['auc']:.3f}, Accuracy: {metrics['accuracy']:.3f}")
        return metrics
        
    except Exception as e:
        logger.error(f"{algorithm_name} training failed: {str(e)}")
        return None

def perform_cross_validation(X: np.ndarray, y: np.ndarray, algorithm_name: str, 
                           algorithm_config: Dict[str, Any], cv_folds: int = None) -> Dict[str, Any]:
    """
    Perform cross-validation for an algorithm
    
    Args:
        X: Feature matrix
        y: Target vector
        algorithm_name: Name of the algorithm
        algorithm_config: Algorithm configuration
        cv_folds: Number of CV folds
        
    Returns:
        Dictionary of cross-validation results
    """
    if cv_folds is None:
        cv_folds = config.cv_folds
    
    try:
        cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=config.random_state)
        
        auc_scores = []
        accuracy_scores = []
        
        for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
            logger.debug(f"CV Fold {fold + 1}/{cv_folds} for {algorithm_name}")
            
            # Get fresh model instance for each fold
            fresh_config = get_algorithm_config(algorithm_name)
            if fresh_config is None:
                continue
            
            X_train_fold, X_val_fold = X[train_idx], X[val_idx]
            y_train_fold, y_val_fold = y[train_idx], y[val_idx]
            
            fold_results = train_single_algorithm(
                X_train_fold, X_val_fold, y_train_fold, y_val_fold,
                algorithm_name, fresh_config
            )
            
            if fold_results is not None:
                auc_scores.append(fold_results['auc'])
                accuracy_scores.append(fold_results['accuracy'])
        
        if len(auc_scores) == 0:
            logger.warning(f"No successful CV folds for {algorithm_name}")
            return {
                'cv_auc_mean': 0.5, 'cv_auc_std': 0.0,
                'cv_auc_ci_lower': 0.5, 'cv_auc_ci_upper': 0.5,
                'cv_accuracy_mean': 0.5, 'cv_accuracy_std': 0.0,
                'cv_folds': 0, 'cv_stability': 'FAILED'
            }
        
        # Calculate statistics
        auc_mean = np.mean(auc_scores)
        auc_std = np.std(auc_scores)
        accuracy_mean = np.mean(accuracy_scores)
        accuracy_std = np.std(accuracy_scores)
        
        # Calculate confidence interval
        confidence_alpha = 1 - config.confidence_level
        t_critical = t.ppf(1 - confidence_alpha/2, len(auc_scores) - 1)
        margin_of_error = t_critical * (auc_std / np.sqrt(len(auc_scores)))
        
        ci_lower = max(0, auc_mean - margin_of_error)
        ci_upper = min(1, auc_mean + margin_of_error)
        
        # Assess stability
        coefficient_of_variation = auc_std / auc_mean if auc_mean > 0 else float('inf')
        if coefficient_of_variation < 0.05:
            stability = 'HIGH'
        elif coefficient_of_variation < 0.10:
            stability = 'STABLE'
        else:
            stability = 'VARIABLE'
        
        cv_results = {
            'cv_auc_mean': auc_mean,
            'cv_auc_std': auc_std,
            'cv_auc_ci_lower': ci_lower,
            'cv_auc_ci_upper': ci_upper,
            'cv_accuracy_mean': accuracy_mean,
            'cv_accuracy_std': accuracy_std,
            'cv_folds': len(auc_scores),
            'cv_stability': stability
        }
        
        logger.info(f"{algorithm_name} CV - AUC: {auc_mean:.3f}±{auc_std:.3f} ({stability})")
        return cv_results
        
    except Exception as e:
        logger.error(f"Cross-validation failed for {algorithm_name}: {str(e)}")
        return {
            'cv_auc_mean': 0.5, 'cv_auc_std': 0.0,
            'cv_auc_ci_lower': 0.5, 'cv_auc_ci_upper': 0.5,
            'cv_accuracy_mean': 0.5, 'cv_accuracy_std': 0.0,
            'cv_folds': 0, 'cv_stability': 'FAILED'
        }

print("Model training and evaluation functions defined successfully!")

In [None]:
# Cell 7: Main Analysis Pipeline Functions

def run_prediction_task(X: np.ndarray, y: np.ndarray, task_name: str, 
                       dataset_name: str, algorithms: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """
    Run a complete prediction task with multiple algorithms
    
    Args:
        X: Feature matrix
        y: Target vector
        task_name: Name of the prediction task
        dataset_name: Name of the dataset
        algorithms: Dictionary of available algorithms
        
    Returns:
        Dictionary mapping algorithm names to their results
    """
    logger.info(f"\n=== {task_name} - {dataset_name} ===")
    logger.info(f"Data shape: {X.shape[0]} samples, {X.shape[1]} features")
    
    # Check class distribution
    unique_classes, class_counts = np.unique(y, return_counts=True)
    class_ratio = min(class_counts) / max(class_counts)
    logger.info(f"Class distribution: {dict(zip(unique_classes, class_counts))}")
    logger.info(f"Class ratio: {class_ratio:.3f}")
    
    try:
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, 
            test_size=config.test_size, 
            random_state=config.random_state, 
            stratify=y
        )
        logger.info(f"Train/Test split: {len(X_train)}/{len(X_test)} samples")
        
    except ValueError:
        # Fallback if stratification fails
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, 
            test_size=config.test_size, 
            random_state=config.random_state
        )
        logger.warning("Stratified split failed, using random split")
    
    task_results = {}
    
    # Test each algorithm
    for algorithm_name, algorithm_config in algorithms.items():
        logger.info(f"Testing {algorithm_name}...")
        
        # Train and evaluate on holdout set
        holdout_results = train_single_algorithm(
            X_train, X_test, y_train, y_test, 
            algorithm_name, algorithm_config
        )
        
        if holdout_results is None:
            logger.warning(f"{algorithm_name} failed on holdout set")
            continue
        
        # Perform cross-validation
        cv_results = perform_cross_validation(X, y, algorithm_name, algorithm_config)
        
        # Combine results
        combined_results = {**holdout_results, **cv_results}
        combined_results['task_name'] = task_name
        combined_results['dataset_name'] = dataset_name
        
        task_results[algorithm_name] = combined_results
        
        logger.info(f"{algorithm_name} completed - Holdout AUC: {holdout_results['auc']:.3f}, "
                   f"CV AUC: {cv_results['cv_auc_mean']:.3f}±{cv_results['cv_auc_std']:.3f}")
    
    logger.info(f"Task completed: {len(task_results)} algorithms successful")
    return task_results

def analyze_single_dataset(dataset_name: str, file_path: str, 
                          algorithms: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """
    Analyze a single dataset with all prediction tasks
    
    Args:
        dataset_name: Name of the dataset
        file_path: Path to the dataset file
        algorithms: Dictionary of available algorithms
        
    Returns:
        Dictionary mapping task names to their results
    """
    logger.info(f"\n{'='*60}")
    logger.info(f"ANALYZING DATASET: {dataset_name}")
    logger.info(f"{'='*60}")
    
    # Load dataset
    df = load_and_validate_dataset(file_path, dataset_name)
    if df is None:
        logger.error(f"Failed to load {dataset_name}")
        return {}
    
    # Get dataset summary
    summary = get_dataset_summary(df, dataset_name)
    logger.info(f"Dataset summary: {summary['n_rows']} rows, {summary['n_columns']} columns, "
               f"{summary['missing_data_percent']:.1f}% missing data")
    
    # Create all prediction targets
    all_targets = create_all_targets(df)
    if not all_targets:
        logger.warning(f"No valid targets found for {dataset_name}")
        return {}
    
    # Get available features
    feature_categories = get_feature_categories(df)
    all_features = (feature_categories['clinical'] + 
                   feature_categories['molecular'] + 
                   feature_categories['imaging'])
    
    logger.info(f"Available features: {len(all_features)} total")
    
    dataset_results = {}
    
    # Process each target category
    for target_category, target_info in all_targets.items():
        logger.info(f"\n--- Processing {target_category.upper()} targets ---")
        
        target_data = target_info['data']
        targets = target_info['targets']
        descriptions = target_info['descriptions']
        
        # Process each specific target
        for target, description in zip(targets, descriptions):
            logger.info(f"\nProcessing target: {description}")
            
            # Select appropriate features (avoid data leakage)
            safe_features = select_features_for_target(all_features, target)
            logger.info(f"Using {len(safe_features)} features for {target}")
            
            # Preprocess data
            X, y, error_msg = preprocess_data(target_data, safe_features, target)
            
            if X is None:
                logger.warning(f"Preprocessing failed for {target}: {error_msg}")
                continue
            
            # Run prediction task
            task_results = run_prediction_task(X, y, description, dataset_name, algorithms)
            
            if task_results:
                task_key = f"{target_category}_{target}"
                dataset_results[task_key] = {
                    'task_name': description,
                    'target_category': target_category,
                    'target_name': target,
                    'results': task_results,
                    'n_samples': len(X),
                    'n_features': X.shape[1]
                }
                logger.info(f"Task {description} completed with {len(task_results)} algorithms")
    
    logger.info(f"\nDataset {dataset_name} analysis completed: {len(dataset_results)} tasks successful")
    return dataset_results

def run_comprehensive_analysis(datasets_to_analyze: List[str] = None) -> Dict[str, Dict[str, Dict[str, Any]]]:
    """
    Run comprehensive analysis across all or specified datasets
    
    Args:
        datasets_to_analyze: List of dataset names to analyze, or None for all
        
    Returns:
        Dictionary mapping dataset names to their analysis results
    """
    logger.info("STARTING COMPREHENSIVE NEUROSURGICAL AI ANALYSIS")
    logger.info("="*60)
    
    # Get available algorithms
    algorithms = get_available_algorithms()
    if not algorithms:
        logger.error("No algorithms available for analysis")
        return {}
    
    # Determine which datasets to analyze
    if datasets_to_analyze is None:
        datasets_to_analyze = list(config.datasets.keys())
    
    # Filter to existing files
    datasets_to_analyze = [ds for ds in datasets_to_analyze if file_status.get(ds, False)]
    
    logger.info(f"Analyzing {len(datasets_to_analyze)} datasets with {len(algorithms)} algorithms")
    logger.info(f"Datasets: {datasets_to_analyze}")
    logger.info(f"Algorithms: {list(algorithms.keys())}")
    
    all_results = {}
    
    # Analyze each dataset
    for dataset_name in datasets_to_analyze:
        file_path = config.datasets[dataset_name]
        
        try:
            dataset_results = analyze_single_dataset(dataset_name, file_path, algorithms)
            
            if dataset_results:
                all_results[dataset_name] = dataset_results
                logger.info(f"✓ {dataset_name} analysis completed successfully")
            else:
                logger.warning(f"✗ {dataset_name} analysis produced no results")
                
        except Exception as e:
            logger.error(f"✗ {dataset_name} analysis failed: {str(e)}")
            continue
    
    logger.info(f"\nCOMPREHENSIVE ANALYSIS COMPLETED")
    logger.info(f"Successfully analyzed: {len(all_results)}/{len(datasets_to_analyze)} datasets")
    
    return all_results

print("Main analysis pipeline functions defined successfully!")

In [None]:
# Cell 8: Results Analysis and Reporting Functions

def summarize_results(all_results: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Any]:
    """
    Create a comprehensive summary of all results
    
    Args:
        all_results: Complete results from all datasets
        
    Returns:
        Dictionary containing summary statistics
    """
    if not all_results:
        return {'error': 'No results to summarize'}
    
    summary = {
        'total_datasets': len(all_results),
        'total_tasks': 0,
        'total_algorithm_runs': 0,
        'best_performers': {},
        'algorithm_performance': {},
        'task_performance': {},
        'dataset_performance': {}
    }
    
    all_algorithm_results = []
    
    # Collect all results
    for dataset_name, dataset_results in all_results.items():
        dataset_aucs = []
        
        for task_name, task_info in dataset_results.items():
            summary['total_tasks'] += 1
            task_results = task_info['results']
            
            for algorithm_name, algorithm_results in task_results.items():
                summary['total_algorithm_runs'] += 1
                
                result_entry = {
                    'dataset': dataset_name,
                    'task': task_info['task_name'],
                    'algorithm': algorithm_name,
                    'auc': algorithm_results['auc'],
                    'accuracy': algorithm_results['accuracy'],
                    'cv_auc_mean': algorithm_results['cv_auc_mean'],
                    'cv_auc_std': algorithm_results['cv_auc_std'],
                    'cv_stability': algorithm_results['cv_stability']
                }
                all_algorithm_results.append(result_entry)
                dataset_aucs.append(algorithm_results['auc'])
        
        # Dataset-level performance
        if dataset_aucs:
            summary['dataset_performance'][dataset_name] = {
                'mean_auc': np.mean(dataset_aucs),
                'std_auc': np.std(dataset_aucs),
                'max_auc': np.max(dataset_aucs),
                'n_results': len(dataset_aucs)
            }
    
    if not all_algorithm_results:
        return summary
    
    # Convert to DataFrame for easier analysis
    results_df = pd.DataFrame(all_algorithm_results)
    
    # Algorithm performance summary
    algorithm_stats = results_df.groupby('algorithm').agg({
        'auc': ['mean', 'std', 'max', 'count'],
        'cv_auc_mean': ['mean', 'std'],
        'accuracy': ['mean', 'std']
    }).round(4)
    
    summary['algorithm_performance'] = algorithm_stats.to_dict()
    
    # Task performance summary
    task_stats = results_df.groupby('task').agg({
        'auc': ['mean', 'std', 'max', 'count']
    }).round(4)
    
    summary['task_performance'] = task_stats.to_dict()
    
    # Find best performers
    summary['best_performers'] = {
        'highest_auc': results_df.loc[results_df['auc'].idxmax()].to_dict(),
        'most_stable': results_df.loc[results_df['cv_auc_std'].idxmin()].to_dict(),
        'best_cv_performance': results_df.loc[results_df['cv_auc_mean'].idxmax()].to_dict()
    }
    
    return summary

def generate_results_table(all_results: Dict[str, Dict[str, Dict[str, Any]]], 
                          sort_by: str = 'auc') -> pd.DataFrame:
    """
    Generate a comprehensive results table
    
    Args:
        all_results: Complete results from all datasets
        sort_by: Column to sort by ('auc', 'accuracy', 'cv_auc_mean')
        
    Returns:
        DataFrame with all results
    """
    rows = []
    
    for dataset_name, dataset_results in all_results.items():
        for task_name, task_info in dataset_results.items():
            task_results = task_info['results']
            
            for algorithm_name, algorithm_results in task_results.items():
                row = {
                    'Dataset': dataset_name,
                    'Task': task_info['task_name'],
                    'Algorithm': algorithm_name,
                    'AUC': algorithm_results['auc'],
                    'Accuracy': algorithm_results['accuracy'],
                    'Sensitivity': algorithm_results['sensitivity'],
                    'Specificity': algorithm_results['specificity'],
                    'PPV': algorithm_results['ppv'],
                    'NPV': algorithm_results['npv'],
                    'F1-Score': algorithm_results['f1_score'],
                    'CV_AUC_Mean': algorithm_results['cv_auc_mean'],
                    'CV_AUC_Std': algorithm_results['cv_auc_std'],
                    'CV_Stability': algorithm_results['cv_stability'],
                    'N_Samples': task_info['n_samples'],
                    'N_Features': task_info['n_features'],
                    'Scaling_Used': algorithm_results['scaling_used']
                }
                rows.append(row)
    
    if not rows:
        return pd.DataFrame()
    
    df = pd.DataFrame(rows)
    
    # Sort by specified column
    if sort_by in df.columns:
        df = df.sort_values(sort_by, ascending=False)
    
    return df

def print_summary_report(all_results: Dict[str, Dict[str, Dict[str, Any]]]):
    """
    Print a comprehensive summary report to console
    
    Args:
        all_results: Complete results from all datasets
    """
    print("\n" + "="*80)
    print("COMPREHENSIVE NEUROSURGICAL AI ANALYSIS REPORT")
    print("="*80)
    
    if not all_results:
        print("No results to report.")
        return
    
    summary = summarize_results(all_results)
    
    # Executive Summary
    print(f"\nEXECUTIVE SUMMARY:")
    print(f"• Analyzed {summary['total_datasets']} datasets")
    print(f"• Completed {summary['total_tasks']} prediction tasks")
    print(f"• Ran {summary['total_algorithm_runs']} algorithm evaluations")
    
    # Best Performers
    print(f"\nBEST PERFORMERS:")
    if 'highest_auc' in summary['best_performers']:
        best = summary['best_performers']['highest_auc']
        print(f"• Highest AUC: {best['algorithm']} on {best['task']} ({best['dataset']}) - AUC: {best['auc']:.3f}")
    
    if 'most_stable' in summary['best_performers']:
        stable = summary['best_performers']['most_stable']
        print(f"• Most Stable: {stable['algorithm']} on {stable['task']} ({stable['dataset']}) - CV Std: {stable['cv_auc_std']:.3f}")
    
    if 'best_cv_performance' in summary['best_performers']:
        cv_best = summary['best_performers']['best_cv_performance']
        print(f"• Best CV Performance: {cv_best['algorithm']} on {cv_best['task']} ({cv_best['dataset']}) - CV AUC: {cv_best['cv_auc_mean']:.3f}")
    
    # Dataset Performance
    print(f"\nDATASET PERFORMANCE:")
    for dataset, performance in summary['dataset_performance'].items():
        print(f"• {dataset}: Mean AUC {performance['mean_auc']:.3f} ± {performance['std_auc']:.3f} "
              f"(Max: {performance['max_auc']:.3f}, N={performance['n_results']})")
    
    # Algorithm Rankings
    print(f"\nALGORITHM RANKINGS (by mean AUC):")
    results_df = generate_results_table(all_results)
    if not results_df.empty:
        algo_rankings = results_df.groupby('Algorithm')['AUC'].agg(['mean', 'std', 'count']).sort_values('mean', ascending=False)
        for i, (algorithm, stats) in enumerate(algo_rankings.iterrows(), 1):
            print(f"{i}. {algorithm}: {stats['mean']:.3f} ± {stats['std']:.3f} (N={int(stats['count'])})")
    
    # Task Difficulty
    print(f"\nTASK DIFFICULTY (by mean AUC, lower = harder):")
    if not results_df.empty:
        task_difficulty = results_df.groupby('Task')['AUC'].agg(['mean', 'std', 'count']).sort_values('mean', ascending=True)
        for i, (task, stats) in enumerate(task_difficulty.iterrows(), 1):
            print(f"{i}. {task}: {stats['mean']:.3f} ± {stats['std']:.3f} (N={int(stats['count'])})")

def save_results_to_files(all_results: Dict[str, Dict[str, Dict[str, Any]]], output_dir: str = "."):
    """
    Save results to CSV and text files
    
    Args:
        all_results: Complete results from all datasets
        output_dir: Directory to save files
    """
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    if not all_results:
        logger.warning("No results to save")
        return
    
    # Save comprehensive results table
    results_df = generate_results_table(all_results)
    if not results_df.empty:
        csv_path = output_path / "neurosurgical_ai_comprehensive_results.csv"
        results_df.to_csv(csv_path, index=False)
        logger.info(f"Comprehensive results saved to: {csv_path}")
    
    # Save summary report
    summary = summarize_results(all_results)
    txt_path = output_path / "neurosurgical_ai_summary_report.txt"
    
    with open(txt_path, 'w', encoding='utf-8') as f:
        f.write("NEUROSURGICAL AI ANALYSIS SUMMARY REPORT\n")
        f.write("="*50 + "\n\n")
        
        f.write(f"Analysis Overview:\n")
        f.write(f"• Total datasets analyzed: {summary['total_datasets']}\n")
        f.write(f"• Total prediction tasks: {summary['total_tasks']}\n")
        f.write(f"• Total algorithm evaluations: {summary['total_algorithm_runs']}\n\n")
        
        if 'best_performers' in summary:
            f.write("Best Performers:\n")
            for category, performer in summary['best_performers'].items():
                f.write(f"• {category}: {performer['algorithm']} - {performer.get('auc', 'N/A')}\n")
            f.write("\n")
        
        f.write("Dataset Performance Summary:\n")
        for dataset, perf in summary['dataset_performance'].items():
            f.write(f"• {dataset}: {perf['mean_auc']:.3f} ± {perf['std_auc']:.3f}\n")
    
    logger.info(f"Summary report saved to: {txt_path}")
    
    # Save individual dataset reports
    for dataset_name, dataset_results in all_results.items():
        dataset_df = results_df[results_df['Dataset'] == dataset_name]
        if not dataset_df.empty:
            dataset_csv_path = output_path / f"{dataset_name}_detailed_results.csv"
            dataset_df.to_csv(dataset_csv_path, index=False)
            logger.info(f"{dataset_name} detailed results saved to: {dataset_csv_path}")

def analyze_algorithm_strengths(all_results: Dict[str, Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Any]]:
    """
    Analyze which algorithms perform best on which types of tasks
    
    Args:
        all_results: Complete results from all datasets
        
    Returns:
        Dictionary with algorithm strength analysis
    """
    results_df = generate_results_table(all_results)
    if results_df.empty:
        return {}
    
    analysis = {}
    
    # Performance by task type
    for algorithm in results_df['Algorithm'].unique():
        algo_data = results_df[results_df['Algorithm'] == algorithm]
        
        task_performance = algo_data.groupby('Task').agg({
            'AUC': ['mean', 'std', 'count'],
            'CV_AUC_Mean': ['mean', 'std'],
            'CV_Stability': lambda x: (x == 'HIGH').sum() / len(x)
        }).round(4)
        
        # Find best and worst tasks for this algorithm
        task_means = algo_data.groupby('Task')['AUC'].mean().sort_values(ascending=False)
        
        analysis[algorithm] = {
            'overall_mean_auc': algo_data['AUC'].mean(),
            'overall_std_auc': algo_data['AUC'].std(),
            'best_task': task_means.index[0] if len(task_means) > 0 else None,
            'best_task_auc': task_means.iloc[0] if len(task_means) > 0 else None,
            'worst_task': task_means.index[-1] if len(task_means) > 0 else None,
            'worst_task_auc': task_means.iloc[-1] if len(task_means) > 0 else None,
            'stability_rate': (algo_data['CV_Stability'] == 'HIGH').sum() / len(algo_data),
            'task_performance': task_performance.to_dict() if not task_performance.empty else {}
        }
    
    return analysis

def generate_clinical_recommendations(all_results: Dict[str, Dict[str, Dict[str, Any]]]) -> List[str]:
    """
    Generate clinical recommendations based on analysis results
    
    Args:
        all_results: Complete results from all datasets
        
    Returns:
        List of clinical recommendation strings
    """
    recommendations = []
    
    if not all_results:
        return ["No results available for generating recommendations."]
    
    results_df = generate_results_table(all_results)
    if results_df.empty:
        return ["No valid results for generating recommendations."]
    
    # Find consistently high-performing algorithms
    algo_performance = results_df.groupby('Algorithm')['AUC'].agg(['mean', 'std', 'count'])
    top_algorithms = algo_performance[algo_performance['mean'] > 0.7].sort_values('mean', ascending=False)
    
    if not top_algorithms.empty:
        best_algo = top_algorithms.index[0]
        best_auc = top_algorithms.loc[best_algo, 'mean']
        recommendations.append(f"Primary recommendation: {best_algo} shows the best overall performance "
                             f"with mean AUC of {best_auc:.3f} across tasks.")
    
    # Identify most predictable outcomes
    task_performance = results_df.groupby('Task')['AUC'].agg(['mean', 'std', 'count'])
    easiest_tasks = task_performance[task_performance['mean'] > 0.75].sort_values('mean', ascending=False)
    
    if not easiest_tasks.empty:
        recommendations.append(f"Most predictable outcomes: {', '.join(easiest_tasks.index[:3])} show "
                             f"consistently high prediction accuracy.")
    
    # Identify challenging prediction tasks
    difficult_tasks = task_performance[task_performance['mean'] < 0.65].sort_values('mean', ascending=True)
    
    if not difficult_tasks.empty:
        recommendations.append(f"Challenging predictions: {', '.join(difficult_tasks.index[:3])} may require "
                             f"additional feature engineering or larger datasets.")
    
    # Sample size recommendations
    sample_size_analysis = results_df.groupby('Task').agg({'N_Samples': 'mean', 'AUC': 'mean'})
    correlation = sample_size_analysis['N_Samples'].corr(sample_size_analysis['AUC'])
    
    if correlation > 0.3:
        recommendations.append("Larger sample sizes appear to improve prediction performance. "
                             "Consider expanding datasets for better results.")
    
    return recommendations

print("Results analysis and reporting functions defined successfully!")

In [None]:
# Cell 9: Main Execution Cell

def main_analysis(datasets_to_run: List[str] = None, save_results: bool = True, 
                 output_directory: str = "./results") -> Dict[str, Dict[str, Dict[str, Any]]]:
    """
    Main function to run the complete analysis pipeline
    
    Args:
        datasets_to_run: List of dataset names to analyze (None for all available)
        save_results: Whether to save results to files
        output_directory: Directory to save results
        
    Returns:
        Complete analysis results
    """
    print("Starting Neurosurgical AI Analysis Pipeline...")
    print("="*60)
    
    # Step 1: Run comprehensive analysis
    all_results = run_comprehensive_analysis(datasets_to_run)
    
    if not all_results:
        logger.error("No results generated. Check dataset availability and configuration.")
        return {}
    
    # Step 2: Print summary report
    print_summary_report(all_results)
    
    # Step 3: Generate additional analyses
    print("\nGENERATING DETAILED ANALYSES...")
    
    # Algorithm strength analysis
    algorithm_analysis = analyze_algorithm_strengths(all_results)
    if algorithm_analysis:
        print("\nALGORITHM STRENGTH ANALYSIS:")
        for algorithm, analysis in algorithm_analysis.items():
            print(f"• {algorithm}: Best at {analysis['best_task']} "
                  f"(AUC: {analysis['best_task_auc']:.3f}), "
                  f"Overall: {analysis['overall_mean_auc']:.3f}")
    
    # Clinical recommendations
    recommendations = generate_clinical_recommendations(all_results)
    if recommendations:
        print("\nCLINICAL RECOMMENDATIONS:")
        for i, rec in enumerate(recommendations, 1):
            print(f"{i}. {rec}")
    
    # Step 4: Save results if requested
    if save_results:
        print(f"\nSAVING RESULTS TO: {output_directory}")
        save_results_to_files(all_results, output_directory)
    
    print("\n" + "="*60)
    print("ANALYSIS PIPELINE COMPLETED SUCCESSFULLY!")
    print("="*60)
    
    return all_results

def run_single_dataset_analysis(dataset_name: str, save_results: bool = True) -> Dict[str, Dict[str, Any]]:
    """
    Analyze a single dataset
    
    Args:
        dataset_name: Name of the dataset to analyze
        save_results: Whether to save results
        
    Returns:
        Analysis results for the dataset
    """
    if dataset_name not in config.datasets:
        logger.error(f"Dataset '{dataset_name}' not found in configuration")
        return {}
    
    if not file_status.get(dataset_name, False):
        logger.error(f"Dataset file for '{dataset_name}' not found")
        return {}
    
    print(f"Analyzing single dataset: {dataset_name}")
    results = main_analysis([dataset_name], save_results, f"./results_{dataset_name}")
    
    return results.get(dataset_name, {})

def quick_test_analysis(n_samples: int = 100) -> Dict[str, Dict[str, Dict[str, Any]]]:
    """
    Run a quick test analysis with limited data for testing purposes
    
    Args:
        n_samples: Number of samples to use for testing
        
    Returns:
        Test analysis results
    """
    print(f"Running quick test analysis with {n_samples} samples...")
    
    # Temporarily modify config for testing
    original_min_sample_size = config.min_sample_size
    config.min_sample_size = min(10, n_samples // 5)
    
    try:
        # Get first available dataset
        available_datasets = [ds for ds, status in file_status.items() if status]
        if not available_datasets:
            logger.error("No datasets available for testing")
            return {}
        
        test_dataset = available_datasets[0]
        logger.info(f"Using {test_dataset} for quick test")
        
        # Load and sample data
        file_path = config.datasets[test_dataset]
        df = load_and_validate_dataset(file_path, test_dataset)
        
        if df is None or len(df) < n_samples:
            logger.error("Insufficient data for testing")
            return {}
        
        # Sample data
        df_sample = df.sample(n=min(n_samples, len(df)), random_state=42)
        
        # Temporarily save sample
        sample_path = f"./temp_sample_{test_dataset}.csv"
        df_sample.to_csv(sample_path, index=False)
        
        # Update config temporarily
        original_path = config.datasets[test_dataset]
        config.datasets[test_dataset] = sample_path
        
        # Run analysis
        results = main_analysis([test_dataset], save_results=False)
        
        # Cleanup
        config.datasets[test_dataset] = original_path
        Path(sample_path).unlink(missing_ok=True)
        
        return results
        
    finally:
        # Restore original config
        config.min_sample_size = original_min_sample_size

# Example usage functions
def example_full_analysis():
    """Example: Run full analysis on all datasets"""
    return main_analysis()

def example_specific_datasets():
    """Example: Run analysis on specific datasets"""
    datasets_to_analyze = ['ConvNext', 'ViT']  # Modify as needed
    return main_analysis(datasets_to_analyze)

def example_single_dataset():
    """Example: Analyze just one dataset"""
    return run_single_dataset_analysis('ConvNext')  # Modify dataset name as needed

def example_quick_test():
    """Example: Quick test with small sample"""
    return quick_test_analysis(n_samples=50)

print("Main execution functions defined successfully!")
print("\nAvailable execution options:")
print("1. main_analysis() - Run full analysis")
print("2. run_single_dataset_analysis('dataset_name') - Analyze one dataset")
print("3. quick_test_analysis(n_samples=100) - Quick test")
print("4. example_* functions for common use cases")
print("\nReady to run analysis!")

In [None]:
# Run cells 1-8 first to define all functions
# Run cell 9 to get execution options
# Execute analysis using one of these approaches:

# Option 1: Full analysis
results = main_analysis()

# Option 2: Specific datasets  
results = main_analysis(['ConvNext', 'ViT'])

# Option 3: Single dataset
results = run_single_dataset_analysis('ConvNext')

# Option 4: Quick test
results = quick_test_analysis(n_samples=100)