In [10]:
# IOL CALCULATION FOR PRE-DMEK PATIENTS - SETUP AND DATA LOADING
# ================================================================
# PURPOSE: Set up the analysis environment and load patient data
# This notebook optimizes IOL power calculations for Fuchs' dystrophy patients
# undergoing combined phacoemulsification and DMEK surgery

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# Constants for clinical accuracy thresholds (diopters)
THRESHOLDS = [0.25, 0.50, 0.75, 1.00]
TEST_SIZE = 0.25      # 25% holdout for final testing
N_FOLDS = 5           # 5-fold cross-validation

# MULTI-SEED CONFIGURATION FOR ROBUST VALIDATION
SEEDS = [42, 123, 456, 789, 2025]  # Multiple seeds for statistical robustness
print("=" * 70)
print("üîß MULTI-SEED CONFIGURATION")
print("=" * 70)
print(f"Seeds for validation: {SEEDS}")
print("This ensures results are not dependent on random split")
print("Each seed creates different train/test splits for robust assessment")

# Storage for multi-seed results
multi_seed_results = {

    'parameter': {},
    'multiplicative': {},
    'additive': {},
    'combined': {},
    'fixed_combined': {}
}

print("=" * 70)
print("IOL CALCULATION FOR PRE-DMEK PATIENTS")
print("=" * 70)

print("\nüìä WHAT WE'RE DOING:")
print("-" * 50)
print("‚Ä¢ Loading data from Fuchs' dystrophy patients")
print("‚Ä¢ These patients had combined cataract + DMEK surgery")
print("‚Ä¢ Goal: Improve IOL power calculation accuracy")
print("‚Ä¢ Challenge: Edematous corneas distort standard formulas")
print(f"‚Ä¢ NEW: Using {len(SEEDS)} different seeds for robust validation")

# Load the patient data
df = pd.read_excel('FacoDMEK.xlsx')
print(f"\n‚úÖ Loaded {len(df)} patients from FacoDMEK.xlsx")

print("\nüîç KEY MEASUREMENTS IN OUR DATA:")
print("-" * 50)
print("‚Ä¢ Bio-AL: Axial length (mm)")
print("‚Ä¢ Bio-Ks/Kf: Steep and flat keratometry (D)")
print("‚Ä¢ CCT: Central corneal thickness (Œºm) - KEY for edema")
print("‚Ä¢ IOL Power: Implanted lens power (D)")
print("‚Ä¢ PostOP Spherical Equivalent: Actual outcome (D)")

üîß MULTI-SEED CONFIGURATION
Seeds for validation: [42, 123, 456, 789, 2025]
This ensures results are not dependent on random split
Each seed creates different train/test splits for robust assessment
IOL CALCULATION FOR PRE-DMEK PATIENTS

üìä WHAT WE'RE DOING:
--------------------------------------------------
‚Ä¢ Loading data from Fuchs' dystrophy patients
‚Ä¢ These patients had combined cataract + DMEK surgery
‚Ä¢ Goal: Improve IOL power calculation accuracy
‚Ä¢ Challenge: Edematous corneas distort standard formulas
‚Ä¢ NEW: Using 5 different seeds for robust validation

‚úÖ Loaded 96 patients from FacoDMEK.xlsx

üîç KEY MEASUREMENTS IN OUR DATA:
--------------------------------------------------
‚Ä¢ Bio-AL: Axial length (mm)
‚Ä¢ Bio-Ks/Kf: Steep and flat keratometry (D)
‚Ä¢ CCT: Central corneal thickness (Œºm) - KEY for edema
‚Ä¢ IOL Power: Implanted lens power (D)
‚Ä¢ PostOP Spherical Equivalent: Actual outcome (D)


In [11]:
# STANDARD SRK/T2 FORMULA IMPLEMENTATION
# ========================================
# PURPOSE: Implement the baseline SRK/T2 formula (Sheard et al. 2010)
# This is the current gold standard for IOL calculations
# We'll use this as our baseline to compare improvements against

def calculate_SRKT2(AL, K_avg, IOL_power, A_constant, nc=1.333, k_index=1.3375):
    """
    SRK/T2 Formula (Sheard et al. 2010)
    - Assumes NORMAL corneas (nc=1.333, k_index=1.3375)
    - These assumptions fail in edematous Fuchs' corneas
    
    Parameters:
    - AL: Axial length (mm)
    - K_avg: Average keratometry (D)
    - IOL_power: IOL power (D)
    - A_constant: Lens-specific constant
    - nc: Corneal refractive index (we'll optimize this!)
    - k_index: Keratometric index (we'll optimize this too!)
    """
    # Constants
    na = 1.336  # Aqueous/vitreous refractive index
    V = 12      # Vertex distance (mm)
    ncm1 = nc - 1
    
    # Convert keratometry to radius using keratometric index
    # This is where edema causes problems - k_index assumes normal cornea!
    r = (k_index - 1) * 1000 / K_avg
    
    # Axial length correction for long eyes
    if AL <= 24.2:
        LCOR = AL
    else:
        LCOR = 3.446 + 1.716 * AL - 0.0237 * AL * AL
    
    # H2 calculation (corneal height) - Sheard's modification
    H2 = -10.326 + 0.32630 * LCOR + 0.13533 * K_avg
    
    # ACD (Anterior Chamber Depth) estimation
    # Edema can affect this too!
    ACD_const = 0.62467 * A_constant - 68.747
    offset = ACD_const - 3.336
    ACD_est = H2 + offset
    
    # Retinal thickness correction
    RETHICK = 0.65696 - 0.02029 * AL
    LOPT = AL + RETHICK  # Optical axial length
    
    # SRK/T2 refraction calculation - the complex optics formula
    numerator = (1000 * na * (na * r - ncm1 * LOPT) - 
                 IOL_power * (LOPT - ACD_est) * (na * r - ncm1 * ACD_est))
    
    denominator = (na * (V * (na * r - ncm1 * LOPT) + LOPT * r) - 
                   0.001 * IOL_power * (LOPT - ACD_est) * 
                   (V * (na * r - ncm1 * ACD_est) + ACD_est * r))
    
    return numerator / denominator

print("=" * 70)
print("SRK/T2 FORMULA (Sheard et al. 2010)")
print("=" * 70)

print("‚Ä¢ SKR/T2 assumes normal corneal properties")
print("‚Ä¢ In Fuchs' dystrophy, the cornea is NOT normal:")
print("  - Edema changes refractive index (nc)")
print("  - Swelling alters keratometric index (k_index)")
print("  - Anterior chamber depth is affected")
print("\nOur strategy: Keep the formula structure, optimize the parameters!")

print("\nüìê THE SRK/T2 FORMULA:")
print()
print("         1000¬∑n‚Çê¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑Lopt) - P¬∑(Lopt - ACDest)¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑ACDest)")
print("REF = ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
print("       n‚Çê¬∑(V¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑Lopt) + Lopt¬∑r) - 0.001¬∑P¬∑(Lopt - ACDest)¬∑(V¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑ACDest) + ACDest¬∑r)")

SRK/T2 FORMULA (Sheard et al. 2010)
‚Ä¢ SKR/T2 assumes normal corneal properties
‚Ä¢ In Fuchs' dystrophy, the cornea is NOT normal:
  - Edema changes refractive index (nc)
  - Swelling alters keratometric index (k_index)
  - Anterior chamber depth is affected

Our strategy: Keep the formula structure, optimize the parameters!

üìê THE SRK/T2 FORMULA:

         1000¬∑n‚Çê¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑Lopt) - P¬∑(Lopt - ACDest)¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑ACDest)
REF = ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
       n‚Çê¬∑(V¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑Lopt) + Lopt¬∑r) - 0.001¬∑P¬∑(Lopt - ACDest)¬∑(V¬∑(n‚Çê¬∑r - nc‚Çã‚ÇÅ¬∑ACDest) + ACDest¬∑r)


In [12]:
# BASELINE PERFORMANCE EVALUATION
# =================================
# PURPOSE: Calculate how well standard SRK/T2 performs on our Fuchs' patients
# This establishes the baseline that we need to beat
# Spoiler: It won't be great due to the edematous corneas!

print("=" * 70)
print("BASELINE SRK/T2 PERFORMANCE")
print("=" * 70)

print("\nüìã WHAT WE'RE DOING:")
print("-" * 50)
print("1. Calculate average K from steep and flat readings")
print("2. Apply standard SRK/T2 to all 96 patients")
print("3. Compare predictions to actual outcomes")
print("4. Measure error to establish baseline performance")

# Calculate average K (needed for SRK/T2)
df['K_avg'] = (df['Bio-Ks'] + df['Bio-Kf']) / 2

# Apply standard SRK/T2 formula to all patients
df['SRKT2_Prediction'] = df.apply(
    lambda row: calculate_SRKT2(
        AL=row['Bio-AL'],
        K_avg=row['K_avg'],
        IOL_power=row['IOL Power'],
        A_constant=row['A-Constant']
        # Note: Using DEFAULT nc=1.333 and k_index=1.3375
    ), axis=1
)

# Calculate prediction errors
df['Prediction_Error'] = df['PostOP Spherical Equivalent'] - df['SRKT2_Prediction']
df['Absolute_Error'] = abs(df['Prediction_Error'])

# Calculate key metrics
mae = df['Absolute_Error'].mean()
me = df['Prediction_Error'].mean()
std = df['Prediction_Error'].std()
median_ae = df['Absolute_Error'].median()

print("\nüìä BASELINE PERFORMANCE METRICS:")
print("=" * 70)
print(f"  Mean Absolute Error (MAE):     {mae:.4f} D")
print(f"  Mean Error (ME):                {me:+.4f} D")
print(f"  Standard Deviation (SD):        {std:.4f} D")
print(f"  Median Absolute Error:          {median_ae:.4f} D")

print("\nüí° INTERPRETATION:")
print("-" * 50)
if mae > 1.0:
    print(f"‚Ä¢ MAE of {mae:.2f} D is POOR (>1.0 D is clinically unacceptable)")
else:
    print(f"‚Ä¢ MAE of {mae:.2f} D is moderate")
    
if abs(me) > 0.25:
    print(f"‚Ä¢ Mean error of {me:+.2f} D shows systematic bias")
    if me < 0:
        print("  ‚Üí Formula tends to predict too myopic (negative)")
    else:
        print("  ‚Üí Formula tends to predict too hyperopic (positive)")

# Calculate clinical accuracy rates
within_025 = (df['Absolute_Error'] <= 0.25).sum() / len(df) * 100
within_050 = (df['Absolute_Error'] <= 0.50).sum() / len(df) * 100
within_075 = (df['Absolute_Error'] <= 0.75).sum() / len(df) * 100
within_100 = (df['Absolute_Error'] <= 1.00).sum() / len(df) * 100

print("\nüìà CLINICAL ACCURACY:")
print("-" * 70)
print(f"  Within ¬±0.25 D:  {within_025:.1f}% of eyes")
print(f"  Within ¬±0.50 D:  {within_050:.1f}% of eyes")
print(f"  Within ¬±0.75 D:  {within_075:.1f}% of eyes")
print(f"  Within ¬±1.00 D:  {within_100:.1f}% of eyes")

print("\nüéØ CLINICAL TARGETS:")
print("-" * 50)
print("‚Ä¢ Modern standard: >70% within ¬±0.50 D")
print("‚Ä¢ Acceptable: >90% within ¬±1.00 D")
print(f"‚Ä¢ Our baseline: {within_050:.1f}% within ¬±0.50 D")
print("\n‚ö†Ô∏è Standard SRK/T2 clearly struggles with Fuchs' dystrophy!")
print("This is why we need optimization!")

BASELINE SRK/T2 PERFORMANCE

üìã WHAT WE'RE DOING:
--------------------------------------------------
1. Calculate average K from steep and flat readings
2. Apply standard SRK/T2 to all 96 patients
3. Compare predictions to actual outcomes
4. Measure error to establish baseline performance

üìä BASELINE PERFORMANCE METRICS:
  Mean Absolute Error (MAE):     1.3591 D
  Mean Error (ME):                -0.2915 D
  Standard Deviation (SD):        1.7471 D
  Median Absolute Error:          1.0311 D

üí° INTERPRETATION:
--------------------------------------------------
‚Ä¢ MAE of 1.36 D is POOR (>1.0 D is clinically unacceptable)
‚Ä¢ Mean error of -0.29 D shows systematic bias
  ‚Üí Formula tends to predict too myopic (negative)

üìà CLINICAL ACCURACY:
----------------------------------------------------------------------
  Within ¬±0.25 D:  13.5% of eyes
  Within ¬±0.50 D:  26.0% of eyes
  Within ¬±0.75 D:  35.4% of eyes
  Within ¬±1.00 D:  49.0% of eyes

üéØ CLINICAL TARGETS:
--------

In [13]:
# RIDGE REGRESSION ANALYSIS - IDENTIFYING IMPORTANT FEATURES
# ===========================================================
# PURPOSE: Use machine learning to identify which features matter most
# This will guide our optimization strategy

print("=" * 80)
print("RIDGE REGRESSION FEATURE ANALYSIS")
print("=" * 80)

print("\nüîç WHY START WITH RIDGE?")
print("-" * 50)
print("‚Ä¢ Ridge regression identifies important features")
print("‚Ä¢ Helps us understand what drives prediction errors")
print("‚Ä¢ Guides our formula optimization strategy")
print("‚Ä¢ If CCT features are important, our hypothesis is correct!")

# Create feature matrix with interactions
print("\nüìä CREATING FEATURES:")
print("-" * 50)

features = []
feature_names = []

# Basic features
for col in ['Bio-AL', 'Bio-Ks', 'Bio-Kf', 'IOL Power', 'CCT']:
    features.append(df[col].values)
    feature_names.append(col)

# Add K_avg
features.append(df['K_avg'].values)
feature_names.append('K_avg')

# CCT-derived features
df['CCT_squared'] = df['CCT'] ** 2
df['CCT_deviation'] = df['CCT'] - 550
df['CCT_norm'] = (df['CCT'] - 600) / 100

features.extend([
    df['CCT_squared'].values,
    df['CCT_deviation'].values,
    df['CCT_norm'].values
])
feature_names.extend(['CCT_squared', 'CCT_deviation', 'CCT_norm'])

# Interaction terms
df['CCT_x_AL'] = df['CCT'] * df['Bio-AL']
df['CCT_x_K'] = df['CCT'] * df['K_avg']
df['CCT_ratio_AL'] = df['CCT'] / df['Bio-AL']

features.extend([
    df['CCT_x_AL'].values,
    df['CCT_x_K'].values,
    df['CCT_ratio_AL'].values
])
feature_names.extend(['CCT_x_AL', 'CCT_x_K', 'CCT_ratio_AL'])

X = np.column_stack(features)
y = df['PostOP Spherical Equivalent'].values

print(f"Created {len(feature_names)} features including CCT interactions")

# Standardize and train Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Ridge to get feature importance
ridge_analysis = Ridge(alpha=1.0)
ridge_analysis.fit(X_scaled, y)

# Get feature importance from coefficients
feature_importance = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': ridge_analysis.coef_,
    'Abs_Coefficient': np.abs(ridge_analysis.coef_)
}).sort_values('Abs_Coefficient', ascending=False)

print("\nüèÜ TOP 10 MOST IMPORTANT FEATURES:")
print("-" * 50)
for idx, row in feature_importance.head(10).iterrows():
    print(f"  {row['Feature']:20} Coef={row['Coefficient']:+.4f}")

# Analyze CCT importance
cct_features = feature_importance[feature_importance['Feature'].str.contains('CCT')]
cct_importance = cct_features['Abs_Coefficient'].sum()
total_importance = feature_importance['Abs_Coefficient'].sum()
cct_percentage = (cct_importance / total_importance) * 100

print("\nüí° KEY FINDINGS:")
print("-" * 50)
print(f"‚Ä¢ CCT-related features account for {cct_percentage:.1f}% of total importance")
print(f"‚Ä¢ Top feature: {feature_importance.iloc[0]['Feature']}")

if 'CCT_ratio_AL' in feature_importance.head(3)['Feature'].values:
    print("‚Ä¢ CCT/AL ratio is among top 3 features!")
    print("‚Ä¢ This validates that CCT relative to eye size matters")

if cct_percentage > 50:
    print("\n‚úÖ HYPOTHESIS CONFIRMED:")
    print("CCT features dominate prediction - our CCT-dependent approach is justified!")

print("\nüéØ OPTIMIZATION STRATEGY BASED ON RIDGE:")
print("-" * 50)
print("1. Make optical parameters CCT-dependent (nc, k_index)")
print("2. Consider CCT/AL ratio in corrections")
print("3. Account for CCT interactions with other measurements")

RIDGE REGRESSION FEATURE ANALYSIS

üîç WHY START WITH RIDGE?
--------------------------------------------------
‚Ä¢ Ridge regression identifies important features
‚Ä¢ Helps us understand what drives prediction errors
‚Ä¢ Guides our formula optimization strategy
‚Ä¢ If CCT features are important, our hypothesis is correct!

üìä CREATING FEATURES:
--------------------------------------------------
Created 12 features including CCT interactions

üèÜ TOP 10 MOST IMPORTANT FEATURES:
--------------------------------------------------
  CCT_ratio_AL         Coef=+1.3677
  CCT_x_AL             Coef=-0.8898
  CCT_squared          Coef=-0.7666
  Bio-AL               Coef=+0.4903
  Bio-Ks               Coef=-0.3178
  CCT_x_K              Coef=+0.3101
  K_avg                Coef=-0.1584
  IOL Power            Coef=-0.1189
  CCT_norm             Coef=+0.0321
  CCT                  Coef=+0.0321

üí° KEY FINDINGS:
--------------------------------------------------
‚Ä¢ CCT-related features account

In [None]:
# PARAMETER OPTIMIZATION WITH K-FOLD CROSS-VALIDATION - MULTI-SEED
# =============================================
# PURPOSE: Optimize SRK/T2 parameters with nested CV for robust validation
# NOW WITH MULTIPLE SEEDS for statistical robustness

print("=" * 80)
print("PARAMETER OPTIMIZATION WITH K-FOLD CV - MULTI-SEED ANALYSIS")
print("=" * 80)

print("\nüéØ MULTI-SEED NESTED CROSS-VALIDATION:")
print("-" * 50)
print(f"‚Ä¢ Testing {len(SEEDS)} different random seeds: {SEEDS}")
print("‚Ä¢ Each seed: 75% train, 25% test")
print("‚Ä¢ Inner: 5-fold CV on training set")
print("‚Ä¢ Results averaged across seeds for robustness")

from scipy.optimize import differential_evolution
from sklearn.model_selection import train_test_split, KFold
import numpy as np

def calculate_mae_param(params, df_data):
    """Calculate MAE for parameter optimization"""
    nc_base, nc_cct_coef, k_index_base, k_index_cct_coef, acd_offset_base, acd_offset_cct_coef = params
    
    predictions = []
    for _, row in df_data.iterrows():
        cct_norm = (row['CCT'] - 600) / 100
        nc = nc_base + nc_cct_coef * cct_norm
        k_index = k_index_base + k_index_cct_coef * cct_norm
        acd_offset = acd_offset_base + acd_offset_cct_coef * cct_norm
        
        pred = calculate_SRKT2(
            AL=row['Bio-AL'],
            K_avg=row['K_avg'],
            IOL_power=row['IOL Power'],
            A_constant=row['A-Constant'] + acd_offset,
            nc=nc,
            k_index=k_index
        )
        predictions.append(pred)
    
    mae = mean_absolute_error(df_data['PostOP Spherical Equivalent'], predictions)
    return mae

bounds_param = [
    (1.20, 1.50),    # nc_base
    (-0.20, 0.20),   # nc_cct_coef  
    (1.20, 1.60),    # k_index_base
    (-0.30, 0.30),   # k_index_cct_coef
    (-3.0, 3.0),     # acd_offset_base
    (-3.0, 3.0),     # acd_offset_cct_coef
]

# Store results for each seed
seed_results_param = []
seed_test_maes_param = []
seed_train_maes_param = []  # NEW: Track training MAEs
seed_baseline_maes_param = []
seed_improvements_param = []
seed_overfit_ratios_param = []  # NEW: Track overfitting

df['K_avg'] = (df['Bio-Ks'] + df['Bio-Kf']) / 2

print("\n" + "="*80)
print("RUNNING MULTI-SEED ANALYSIS")
print("="*80)

for seed_idx, SEED in enumerate(SEEDS, 1):
    print(f"\n{'='*40}")
    print(f"SEED {seed_idx}/{len(SEEDS)}: {SEED}")
    print(f"{'='*40}")
    
    # OUTER SPLIT with current seed
    X_train_param, X_test_param = train_test_split(df, test_size=0.25, random_state=SEED)
    X_train_param['K_avg'] = (X_train_param['Bio-Ks'] + X_train_param['Bio-Kf']) / 2
    X_test_param['K_avg'] = (X_test_param['Bio-Ks'] + X_test_param['Bio-Kf']) / 2
    
    print(f"üìä Split: {len(X_train_param)} train, {len(X_test_param)} test")
    
    # INNER K-FOLD CV
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    fold_params = []
    fold_maes = []
    
    for fold_num, (train_idx, val_idx) in enumerate(kf.split(X_train_param), 1):
        fold_train = X_train_param.iloc[train_idx]
        fold_val = X_train_param.iloc[val_idx]
        
        # Optimize on fold
        result_fold = differential_evolution(
            lambda p: calculate_mae_param(p, fold_train),
            bounds_param,
            maxiter=30,
            seed=SEED + fold_num,
            workers=1,
            updating='deferred',
            disp=False
        )
        
        fold_params.append(result_fold.x)
        val_mae = calculate_mae_param(result_fold.x, fold_val)
        fold_maes.append(val_mae)
    
    # Average parameters from folds
    avg_params = np.mean(fold_params, axis=0)
    avg_cv_mae = np.mean(fold_maes)
    std_cv_mae = np.std(fold_maes)
    
    print(f"  CV MAE: {avg_cv_mae:.4f} ¬± {std_cv_mae:.4f} D")
    
    # FINAL RETRAINING on full training set
    result_final = differential_evolution(
        lambda p: calculate_mae_param(p, X_train_param),
        bounds_param,
        maxiter=50,
        seed=SEED,
        workers=1,
        updating='deferred',
        disp=False
    )
    
    final_params = result_final.x
    
    # EVALUATE ON TRAINING SET (for overfitting check)
    mae_train = calculate_mae_param(final_params, X_train_param)
    
    # TEST ON HOLDOUT
    # Calculate baseline
    X_test_param['SRKT2_Baseline'] = X_test_param.apply(
        lambda row: calculate_SRKT2(
            AL=row['Bio-AL'],
            K_avg=row['K_avg'],
            IOL_power=row['IOL Power'],
            A_constant=row['A-Constant']
        ), axis=1
    )
    
    # Apply optimized parameters
    predictions_test = []
    for _, row in X_test_param.iterrows():
        cct_norm = (row['CCT'] - 600) / 100
        nc = final_params[0] + final_params[1] * cct_norm
        k_index = final_params[2] + final_params[3] * cct_norm
        acd_offset = final_params[4] + final_params[5] * cct_norm
        
        pred = calculate_SRKT2(
            AL=row['Bio-AL'],
            K_avg=row['K_avg'],
            IOL_power=row['IOL Power'],
            A_constant=row['A-Constant'] + acd_offset,
            nc=nc,
            k_index=k_index
        )
        predictions_test.append(pred)
    
    mae_baseline = np.abs(X_test_param['SRKT2_Baseline'] - X_test_param['PostOP Spherical Equivalent']).mean()
    mae_optimized = mean_absolute_error(X_test_param['PostOP Spherical Equivalent'], predictions_test)
    improvement = (mae_baseline - mae_optimized) / mae_baseline * 100
    
    print(f"  Train MAE: {mae_train:.4f}, Test MAE: {mae_optimized:.4f}")
    print(f"  Test: Baseline={mae_baseline:.4f}, Optimized={mae_optimized:.4f}")
    print(f"  Improvement: {improvement:.1f}%")
    
    # Check for overfitting
    overfit_ratio = (mae_optimized - mae_train) / mae_train * 100
    if overfit_ratio > 20:
        print(f"  ‚ö†Ô∏è Overfitting detected: Test {overfit_ratio:.1f}% worse than train")
    elif overfit_ratio > 10:
        print(f"  ‚ö†Ô∏è Mild overfitting: Test {overfit_ratio:.1f}% worse than train")
    else:
        print(f"  ‚úÖ Good generalization: Test only {overfit_ratio:.1f}% worse than train")
    
    # Store results
    seed_results_param.append(final_params)
    seed_test_maes_param.append(mae_optimized)
    seed_train_maes_param.append(mae_train)
    seed_baseline_maes_param.append(mae_baseline)
    seed_improvements_param.append(improvement)
    seed_overfit_ratios_param.append(overfit_ratio)

# MULTI-SEED SUMMARY
print("\n" + "="*80)
print("PARAMETER OPTIMIZATION - MULTI-SEED SUMMARY")
print("="*80)

print("\nüìä TEST PERFORMANCE ACROSS SEEDS:")
print("-" * 50)
for i, seed in enumerate(SEEDS):
    print(f"  Seed {seed:3}: MAE={seed_test_maes_param[i]:.4f} D, Improvement={seed_improvements_param[i]:.1f}%")

print("\nüìà STATISTICAL SUMMARY:")
print("-" * 50)
print(f"  Baseline MAE:      {np.mean(seed_baseline_maes_param):.4f} ¬± {np.std(seed_baseline_maes_param):.4f} D")
print(f"  Train MAE:         {np.mean(seed_train_maes_param):.4f} ¬± {np.std(seed_train_maes_param):.4f} D")
print(f"  Test MAE:          {np.mean(seed_test_maes_param):.4f} ¬± {np.std(seed_test_maes_param):.4f} D")
print(f"  Mean Improvement:  {np.mean(seed_improvements_param):.1f} ¬± {np.std(seed_improvements_param):.1f}%")
print(f"  Best seed:         {SEEDS[np.argmin(seed_test_maes_param)]} (MAE={min(seed_test_maes_param):.4f})")
print(f"  Worst seed:        {SEEDS[np.argmax(seed_test_maes_param)]} (MAE={max(seed_test_maes_param):.4f})")

# OVERFITTING ANALYSIS
print("\nüîç OVERFITTING ANALYSIS:")
print("-" * 50)
print(f"  Mean overfit ratio: {np.mean(seed_overfit_ratios_param):.1f}%")
print(f"  (Test MAE is {np.mean(seed_overfit_ratios_param):.1f}% worse than Train MAE on average)")

if np.mean(seed_overfit_ratios_param) < 10:
    print("  ‚úÖ Excellent generalization - minimal overfitting")
elif np.mean(seed_overfit_ratios_param) < 20:
    print("  ‚úÖ Good generalization - acceptable overfitting")
else:
    print("  ‚ö†Ô∏è Significant overfitting - consider regularization")

# Average parameters across seeds
avg_params_all_seeds = np.mean(seed_results_param, axis=0)
std_params_all_seeds = np.std(seed_results_param, axis=0)

print("\n‚úÖ CONSENSUS PARAMETERS (averaged across seeds):")
print("-" * 50)
param_names = ['nc_base', 'nc_cct_coef', 'k_index_base', 'k_index_cct_coef', 'acd_offset_base', 'acd_offset_cct_coef']
for i, name in enumerate(param_names):
    print(f"  {name:20} = {avg_params_all_seeds[i]:+.4f} ¬± {std_params_all_seeds[i]:.4f}")

# Store in global results dictionary
multi_seed_results['parameter'] = {
    'test_maes': seed_test_maes_param,
    'train_maes': seed_train_maes_param,
    'baseline_maes': seed_baseline_maes_param,
    'improvements': seed_improvements_param,
    'overfit_ratios': seed_overfit_ratios_param,
    'mean_mae': np.mean(seed_test_maes_param),
    'std_mae': np.std(seed_test_maes_param),
    'mean_improvement': np.mean(seed_improvements_param)
}

print("\nüí° ROBUSTNESS ANALYSIS:")
print("-" * 50)
mae_cv = np.std(seed_test_maes_param) / np.mean(seed_test_maes_param) * 100
if mae_cv < 5:
    print(f"‚úÖ Excellent stability: CV={mae_cv:.1f}% (very consistent across seeds)")
elif mae_cv < 10:
    print(f"‚úÖ Good stability: CV={mae_cv:.1f}% (consistent across seeds)")
else:
    print(f"‚ö†Ô∏è Moderate stability: CV={mae_cv:.1f}% (some variation across seeds)")

print(f"\nüìä Range of results: {min(seed_test_maes_param):.4f} - {max(seed_test_maes_param):.4f} D")
print(f"   This {max(seed_test_maes_param)-min(seed_test_maes_param):.4f} D range shows the impact of data split")

PARAMETER OPTIMIZATION WITH K-FOLD CV - MULTI-SEED ANALYSIS

üéØ MULTI-SEED NESTED CROSS-VALIDATION:
--------------------------------------------------
‚Ä¢ Testing 5 different random seeds: [42, 123, 456, 789, 2025]
‚Ä¢ Each seed: 75% train, 25% test
‚Ä¢ Inner: 5-fold CV on training set
‚Ä¢ Results averaged across seeds for robustness

RUNNING MULTI-SEED ANALYSIS

SEED 1/5: 42
üìä Split: 72 train, 24 test
  CV MAE: 1.2383 ¬± 0.3650 D
  Train MAE: 1.1642, Test MAE: 1.4354
  Test: Baseline=1.4849, Optimized=1.4354
  Improvement: 3.3%
  ‚ö†Ô∏è Overfitting detected: Test 23.3% worse than train

SEED 2/5: 123
üìä Split: 72 train, 24 test
  CV MAE: 1.3361 ¬± 0.2740 D
  Train MAE: 1.2528, Test MAE: 1.0289
  Test: Baseline=1.2755, Optimized=1.0289
  Improvement: 19.3%
  ‚úÖ Good generalization: Test only -17.9% worse than train

SEED 3/5: 456
üìä Split: 72 train, 24 test
  CV MAE: 1.1921 ¬± 0.1903 D


In [None]:
# MULTIPLICATIVE CORRECTION WITH K-FOLD CV - MULTI-SEED
# ====================================
# PURPOSE: Multiplicative correction with nested CV for robust validation
# NOW WITH MULTIPLE SEEDS for statistical robustness

print("=" * 80)
print("MULTIPLICATIVE CORRECTION WITH K-FOLD CV - MULTI-SEED ANALYSIS")
print("=" * 80)

print("\nüéØ MULTI-SEED NESTED CV STRATEGY:")
print("-" * 50)
print(f"‚Ä¢ Testing {len(SEEDS)} different random seeds: {SEEDS}")
print("‚Ä¢ Each seed: 75/25 train/test split")
print("‚Ä¢ Inner: 5-fold CV on training")
print("‚Ä¢ Find stable multiplicative factors across seeds")

from scipy.optimize import minimize
from sklearn.model_selection import train_test_split, KFold
import numpy as np

def multiplicative_objective(params, df_data):
    """Objective function for multiplicative correction"""
    m0, m1, m2 = params
    
    predictions = []
    actuals = []
    
    for _, row in df_data.iterrows():
        base_pred = row['SRKT2_Prediction']
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        
        correction_factor = 1 + m0 + m1 * cct_norm + m2 * cct_ratio
        corrected_pred = base_pred * correction_factor
        
        predictions.append(corrected_pred)
        actuals.append(row['PostOP Spherical Equivalent'])
    
    return mean_absolute_error(actuals, predictions)

x0_mult = [0, 0, 0]
bounds_mult = [(-0.5, 0.5), (-0.5, 0.5), (-0.5, 0.5)]

# Store results for each seed
seed_results_mult = []
seed_test_maes_mult = []
seed_train_maes_mult = []  # NEW: Track training MAEs
seed_baseline_maes_mult = []
seed_improvements_mult = []
seed_overfit_ratios_mult = []  # NEW: Track overfitting

print("\n" + "="*80)
print("RUNNING MULTI-SEED ANALYSIS")
print("="*80)

for seed_idx, SEED in enumerate(SEEDS, 1):
    print(f"\n{'='*40}")
    print(f"SEED {seed_idx}/{len(SEEDS)}: {SEED}")
    print(f"{'='*40}")
    
    # OUTER SPLIT with current seed
    X_train_mult, X_test_mult = train_test_split(df, test_size=0.25, random_state=SEED)
    X_train_mult['K_avg'] = (X_train_mult['Bio-Ks'] + X_train_mult['Bio-Kf']) / 2
    X_test_mult['K_avg'] = (X_test_mult['Bio-Ks'] + X_test_mult['Bio-Kf']) / 2
    
    print(f"üìä Split: {len(X_train_mult)} train, {len(X_test_mult)} test")
    
    # Calculate baseline SRK/T2 for all data
    for dataset in [X_train_mult, X_test_mult]:
        dataset['SRKT2_Prediction'] = dataset.apply(
            lambda row: calculate_SRKT2(
                AL=row['Bio-AL'],
                K_avg=row['K_avg'],
                IOL_power=row['IOL Power'],
                A_constant=row['A-Constant']
            ), axis=1
        )
    
    # INNER K-FOLD CV
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    fold_params = []
    fold_maes = []
    
    for fold_num, (train_idx, val_idx) in enumerate(kf.split(X_train_mult), 1):
        fold_train = X_train_mult.iloc[train_idx]
        fold_val = X_train_mult.iloc[val_idx]
        
        # Optimize on fold training
        result_fold = minimize(
            lambda p: multiplicative_objective(p, fold_train),
            x0_mult,
            method='L-BFGS-B',
            bounds=bounds_mult
        )
        
        fold_params.append(result_fold.x)
        val_mae = multiplicative_objective(result_fold.x, fold_val)
        fold_maes.append(val_mae)
    
    # Average across folds
    avg_params = np.mean(fold_params, axis=0)
    avg_cv_mae = np.mean(fold_maes)
    std_cv_mae = np.std(fold_maes)
    
    print(f"  CV MAE: {avg_cv_mae:.4f} ¬± {std_cv_mae:.4f} D")
    
    # FINAL RETRAINING on full training set
    result_mult = minimize(
        lambda p: multiplicative_objective(p, X_train_mult),
        x0_mult,
        method='L-BFGS-B',
        bounds=bounds_mult
    )
    m0_opt, m1_opt, m2_opt = result_mult.x
    
    print(f"  Final params: m‚ÇÄ={m0_opt:.4f}, m‚ÇÅ={m1_opt:.4f}, m‚ÇÇ={m2_opt:.4f}")
    
    # EVALUATE ON TRAINING SET (for overfitting check)
    mae_train = multiplicative_objective([m0_opt, m1_opt, m2_opt], X_train_mult)
    
    # TEST ON HOLDOUT
    predictions_mult_test = []
    for _, row in X_test_mult.iterrows():
        base_pred = row['SRKT2_Prediction']
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        
        correction_factor = 1 + m0_opt + m1_opt * cct_norm + m2_opt * cct_ratio
        corrected_pred = base_pred * correction_factor
        predictions_mult_test.append(corrected_pred)
    
    mae_baseline = np.abs(X_test_mult['SRKT2_Prediction'] - X_test_mult['PostOP Spherical Equivalent']).mean()
    mae_optimized = mean_absolute_error(X_test_mult['PostOP Spherical Equivalent'], predictions_mult_test)
    improvement = (mae_baseline - mae_optimized) / mae_baseline * 100
    
    print(f"  Train MAE: {mae_train:.4f}, Test MAE: {mae_optimized:.4f}")
    print(f"  Test: Baseline={mae_baseline:.4f}, Optimized={mae_optimized:.4f}")
    print(f"  Improvement: {improvement:.1f}%")
    
    # Check for overfitting
    overfit_ratio = (mae_optimized - mae_train) / mae_train * 100
    if overfit_ratio > 20:
        print(f"  ‚ö†Ô∏è Overfitting detected: Test {overfit_ratio:.1f}% worse than train")
    elif overfit_ratio > 10:
        print(f"  ‚ö†Ô∏è Mild overfitting: Test {overfit_ratio:.1f}% worse than train")
    else:
        print(f"  ‚úÖ Good generalization: Test only {overfit_ratio:.1f}% worse than train")
    
    # Store results
    seed_results_mult.append([m0_opt, m1_opt, m2_opt])
    seed_test_maes_mult.append(mae_optimized)
    seed_train_maes_mult.append(mae_train)
    seed_baseline_maes_mult.append(mae_baseline)
    seed_improvements_mult.append(improvement)
    seed_overfit_ratios_mult.append(overfit_ratio)

# MULTI-SEED SUMMARY
print("\n" + "="*80)
print("MULTIPLICATIVE CORRECTION - MULTI-SEED SUMMARY")
print("="*80)

print("\nüìä TEST PERFORMANCE ACROSS SEEDS:")
print("-" * 50)
for i, seed in enumerate(SEEDS):
    print(f"  Seed {seed:3}: MAE={seed_test_maes_mult[i]:.4f} D, Improvement={seed_improvements_mult[i]:.1f}%")

print("\nüìà STATISTICAL SUMMARY:")
print("-" * 50)
print(f"  Baseline MAE:      {np.mean(seed_baseline_maes_mult):.4f} ¬± {np.std(seed_baseline_maes_mult):.4f} D")
print(f"  Train MAE:         {np.mean(seed_train_maes_mult):.4f} ¬± {np.std(seed_train_maes_mult):.4f} D")
print(f"  Test MAE:          {np.mean(seed_test_maes_mult):.4f} ¬± {np.std(seed_test_maes_mult):.4f} D")
print(f"  Mean Improvement:  {np.mean(seed_improvements_mult):.1f} ¬± {np.std(seed_improvements_mult):.1f}%")
print(f"  Best seed:         {SEEDS[np.argmin(seed_test_maes_mult)]} (MAE={min(seed_test_maes_mult):.4f})")
print(f"  Worst seed:        {SEEDS[np.argmax(seed_test_maes_mult)]} (MAE={max(seed_test_maes_mult):.4f})")

# OVERFITTING ANALYSIS
print("\nüîç OVERFITTING ANALYSIS:")
print("-" * 50)
print(f"  Mean overfit ratio: {np.mean(seed_overfit_ratios_mult):.1f}%")
print(f"  (Test MAE is {np.mean(seed_overfit_ratios_mult):.1f}% worse than Train MAE on average)")

if np.mean(seed_overfit_ratios_mult) < 10:
    print("  ‚úÖ Excellent generalization - minimal overfitting")
elif np.mean(seed_overfit_ratios_mult) < 20:
    print("  ‚úÖ Good generalization - acceptable overfitting")
else:
    print("  ‚ö†Ô∏è Significant overfitting - consider regularization")

# Average parameters across seeds
avg_params_all_seeds = np.mean(seed_results_mult, axis=0)
std_params_all_seeds = np.std(seed_results_mult, axis=0)

print("\n‚úÖ CONSENSUS PARAMETERS (averaged across seeds):")
print("-" * 50)
print(f"  m‚ÇÄ (constant):     {avg_params_all_seeds[0]:+.4f} ¬± {std_params_all_seeds[0]:.4f}")
print(f"  m‚ÇÅ (CCT coef):     {avg_params_all_seeds[1]:+.4f} ¬± {std_params_all_seeds[1]:.4f}")
print(f"  m‚ÇÇ (ratio coef):   {avg_params_all_seeds[2]:+.4f} ¬± {std_params_all_seeds[2]:.4f}")

print("\nüìê CONSENSUS CORRECTION FORMULA:")
print("-" * 50)
print("Corrected_REF = Standard_SRK/T2 √ó Correction_Factor")
print(f"Correction_Factor = 1 {avg_params_all_seeds[0]:+.4f} {avg_params_all_seeds[1]:+.4f}√óCCT_norm {avg_params_all_seeds[2]:+.4f}√ó(CCT/AL)")

# Store in global results dictionary
multi_seed_results['multiplicative'] = {
    'test_maes': seed_test_maes_mult,
    'train_maes': seed_train_maes_mult,
    'baseline_maes': seed_baseline_maes_mult,
    'improvements': seed_improvements_mult,
    'overfit_ratios': seed_overfit_ratios_mult,
    'mean_mae': np.mean(seed_test_maes_mult),
    'std_mae': np.std(seed_test_maes_mult),
    'mean_improvement': np.mean(seed_improvements_mult)
}

print("\nüí° ROBUSTNESS ANALYSIS:")
print("-" * 50)
mae_cv = np.std(seed_test_maes_mult) / np.mean(seed_test_maes_mult) * 100
if mae_cv < 5:
    print(f"‚úÖ Excellent stability: CV={mae_cv:.1f}% (very consistent across seeds)")
elif mae_cv < 10:
    print(f"‚úÖ Good stability: CV={mae_cv:.1f}% (consistent across seeds)")
else:
    print(f"‚ö†Ô∏è Moderate stability: CV={mae_cv:.1f}% (some variation across seeds)")

print(f"\nüìä Range of results: {min(seed_test_maes_mult):.4f} - {max(seed_test_maes_mult):.4f} D")
print(f"   This {max(seed_test_maes_mult)-min(seed_test_maes_mult):.4f} D range shows the impact of data split")

# Parameter consistency check
print(f"\nüìä Parameter consistency across seeds:")
for i, param_name in enumerate(['m‚ÇÄ', 'm‚ÇÅ', 'm‚ÇÇ']):
    param_values = [p[i] for p in seed_results_mult]
    print(f"  {param_name}: min={min(param_values):.4f}, max={max(param_values):.4f}, range={max(param_values)-min(param_values):.4f}")

In [None]:
# ADDITIVE CORRECTION WITH PROPER VALIDATION - MULTI-SEED
# ================================================
# PURPOSE: Create an additive correction term with multi-seed validation
# NOW WITH MULTIPLE SEEDS for statistical robustness

print("=" * 80)
print("ADDITIVE CORRECTION WITH K-FOLD CV - MULTI-SEED ANALYSIS")
print("=" * 80)

print("\nüéØ MULTI-SEED STRATEGY:")
print("-" * 50)
print(f"‚Ä¢ Testing {len(SEEDS)} different random seeds: {SEEDS}")
print("‚Ä¢ Each seed: 75% train, 25% test")
print("‚Ä¢ Formula: Corrected = SRK/T2 + Correction_Term")
print("‚Ä¢ Uses Ridge-identified important features")

from scipy.optimize import minimize
from sklearn.model_selection import train_test_split, KFold
import numpy as np

def additive_objective(params, df_data):
    """Objective for additive correction using Ridge-identified features"""
    a0, a1, a2, a3 = params
    
    predictions = []
    actuals = []
    
    for _, row in df_data.iterrows():
        # Standard SRK/T2 prediction
        base_pred = row['SRKT2_Prediction']
        
        # Ridge-identified features
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        k_avg = row['K_avg']
        
        # Additive correction based on Ridge insights
        correction = a0 + a1 * cct_norm + a2 * cct_ratio + a3 * k_avg
        corrected_pred = base_pred + correction
        
        predictions.append(corrected_pred)
        actuals.append(row['PostOP Spherical Equivalent'])
    
    return mean_absolute_error(actuals, predictions)

# Initial guess and bounds
x0_add = [0, 0, 0, 0]
bounds_add = [(-2, 2), (-2, 2), (-2, 2), (-0.1, 0.1)]

# Store results for each seed
seed_results_add = []
seed_test_maes_add = []
seed_baseline_maes_add = []
seed_improvements_add = []

print("\n" + "="*80)
print("RUNNING MULTI-SEED ANALYSIS")
print("="*80)

for seed_idx, SEED in enumerate(SEEDS, 1):
    print(f"\n{'='*40}")
    print(f"SEED {seed_idx}/{len(SEEDS)}: {SEED}")
    print(f"{'='*40}")
    
    # OUTER SPLIT with current seed
    X_train_add, X_test_add = train_test_split(df, test_size=0.25, random_state=SEED)
    X_train_add['K_avg'] = (X_train_add['Bio-Ks'] + X_train_add['Bio-Kf']) / 2
    X_test_add['K_avg'] = (X_test_add['Bio-Ks'] + X_test_add['Bio-Kf']) / 2
    
    print(f"üìä Split: {len(X_train_add)} train, {len(X_test_add)} test")
    
    # Calculate baseline SRK/T2 for both sets
    for dataset in [X_train_add, X_test_add]:
        dataset['SRKT2_Prediction'] = dataset.apply(
            lambda row: calculate_SRKT2(
                AL=row['Bio-AL'],
                K_avg=row['K_avg'],
                IOL_power=row['IOL Power'],
                A_constant=row['A-Constant']
            ), axis=1
        )
    
    # INNER K-FOLD CV (optional for additive, but good for consistency)
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    fold_params = []
    fold_maes = []
    
    for fold_num, (train_idx, val_idx) in enumerate(kf.split(X_train_add), 1):
        fold_train = X_train_add.iloc[train_idx]
        fold_val = X_train_add.iloc[val_idx]
        
        # Optimize on fold
        result_fold = minimize(
            lambda p: additive_objective(p, fold_train),
            x0_add,
            method='L-BFGS-B',
            bounds=bounds_add
        )
        
        fold_params.append(result_fold.x)
        val_mae = additive_objective(result_fold.x, fold_val)
        fold_maes.append(val_mae)
    
    # Average across folds
    avg_cv_mae = np.mean(fold_maes)
    std_cv_mae = np.std(fold_maes)
    
    print(f"  CV MAE: {avg_cv_mae:.4f} ¬± {std_cv_mae:.4f} D")
    
    # FINAL OPTIMIZATION on full training set
    result_add = minimize(
        lambda p: additive_objective(p, X_train_add),
        x0_add,
        method='L-BFGS-B',
        bounds=bounds_add
    )
    
    a0_opt, a1_opt, a2_opt, a3_opt = result_add.x
    
    print(f"  Final params: a‚ÇÄ={a0_opt:.4f}, a‚ÇÅ={a1_opt:.4f}, a‚ÇÇ={a2_opt:.4f}, a‚ÇÉ={a3_opt:.4f}")
    
    # EVALUATE ON TRAINING SET (for reference)
    predictions_add_train = []
    for _, row in X_train_add.iterrows():
        base_pred = row['SRKT2_Prediction']
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        k_avg = row['K_avg']
        
        correction = a0_opt + a1_opt * cct_norm + a2_opt * cct_ratio + a3_opt * k_avg
        corrected_pred = base_pred + correction
        predictions_add_train.append(corrected_pred)
    
    mae_train_add = mean_absolute_error(X_train_add['PostOP Spherical Equivalent'], predictions_add_train)
    
    # TEST ON HOLDOUT SET
    predictions_add_test = []
    for _, row in X_test_add.iterrows():
        base_pred = row['SRKT2_Prediction']
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        k_avg = row['K_avg']
        
        correction = a0_opt + a1_opt * cct_norm + a2_opt * cct_ratio + a3_opt * k_avg
        corrected_pred = base_pred + correction
        predictions_add_test.append(corrected_pred)
    
    mae_baseline = np.abs(X_test_add['SRKT2_Prediction'] - X_test_add['PostOP Spherical Equivalent']).mean()
    mae_optimized = mean_absolute_error(X_test_add['PostOP Spherical Equivalent'], predictions_add_test)
    improvement = (mae_baseline - mae_optimized) / mae_baseline * 100
    
    print(f"  Train MAE: {mae_train_add:.4f}, Test MAE: {mae_optimized:.4f}")
    print(f"  Test: Baseline={mae_baseline:.4f}, Optimized={mae_optimized:.4f}")
    print(f"  Improvement: {improvement:.1f}%")
    
    # Check for overfitting
    overfit_ratio = (mae_optimized - mae_train_add) / mae_train_add * 100
    if overfit_ratio > 20:
        print(f"  ‚ö†Ô∏è Overfitting detected: Test {overfit_ratio:.1f}% worse than train")
    
    # Store results
    seed_results_add.append([a0_opt, a1_opt, a2_opt, a3_opt])
    seed_test_maes_add.append(mae_optimized)
    seed_baseline_maes_add.append(mae_baseline)
    seed_improvements_add.append(improvement)

# MULTI-SEED SUMMARY
print("\n" + "="*80)
print("ADDITIVE CORRECTION - MULTI-SEED SUMMARY")
print("="*80)

print("\nüìä TEST PERFORMANCE ACROSS SEEDS:")
print("-" * 50)
for i, seed in enumerate(SEEDS):
    print(f"  Seed {seed:3}: MAE={seed_test_maes_add[i]:.4f} D, Improvement={seed_improvements_add[i]:.1f}%")

print("\nüìà STATISTICAL SUMMARY:")
print("-" * 50)
print(f"  Baseline MAE:      {np.mean(seed_baseline_maes_add):.4f} ¬± {np.std(seed_baseline_maes_add):.4f} D")
print(f"  Optimized MAE:     {np.mean(seed_test_maes_add):.4f} ¬± {np.std(seed_test_maes_add):.4f} D")
print(f"  Mean Improvement:  {np.mean(seed_improvements_add):.1f} ¬± {np.std(seed_improvements_add):.1f}%")
print(f"  Best seed:         {SEEDS[np.argmin(seed_test_maes_add)]} (MAE={min(seed_test_maes_add):.4f})")
print(f"  Worst seed:        {SEEDS[np.argmax(seed_test_maes_add)]} (MAE={max(seed_test_maes_add):.4f})")

# Average parameters across seeds
avg_params_all_seeds = np.mean(seed_results_add, axis=0)
std_params_all_seeds = np.std(seed_results_add, axis=0)

print("\n‚úÖ CONSENSUS PARAMETERS (averaged across seeds):")
print("-" * 50)
print(f"  a‚ÇÄ (constant):     {avg_params_all_seeds[0]:+.4f} ¬± {std_params_all_seeds[0]:.4f}")
print(f"  a‚ÇÅ (CCT_norm):     {avg_params_all_seeds[1]:+.4f} ¬± {std_params_all_seeds[1]:.4f}")
print(f"  a‚ÇÇ (CCT_ratio):    {avg_params_all_seeds[2]:+.4f} ¬± {std_params_all_seeds[2]:.4f}")
print(f"  a‚ÇÉ (K_avg):        {avg_params_all_seeds[3]:+.4f} ¬± {std_params_all_seeds[3]:.4f}")

print("\nüìê CONSENSUS CORRECTION FORMULA:")
print("-" * 50)
print("Corrected_REF = Standard_SRK/T2 + Correction_Term")
print("")
print(f"Correction_Term = {avg_params_all_seeds[0]:+.4f} {avg_params_all_seeds[1]:+.4f}√óCCT_norm {avg_params_all_seeds[2]:+.4f}√ó(CCT/AL) {avg_params_all_seeds[3]:+.4f}√óK_avg")
print("")
print("Where: CCT_norm = (CCT - 600) / 100")

# Store in global results dictionary
multi_seed_results['additive'] = {
    'test_maes': seed_test_maes_add,
    'baseline_maes': seed_baseline_maes_add,
    'improvements': seed_improvements_add,
    'mean_mae': np.mean(seed_test_maes_add),
    'std_mae': np.std(seed_test_maes_add),
    'mean_improvement': np.mean(seed_improvements_add)
}

print("\nüí° ROBUSTNESS ANALYSIS:")
print("-" * 50)
mae_cv = np.std(seed_test_maes_add) / np.mean(seed_test_maes_add) * 100
if mae_cv < 5:
    print(f"‚úÖ Excellent stability: CV={mae_cv:.1f}% (very consistent across seeds)")
elif mae_cv < 10:
    print(f"‚úÖ Good stability: CV={mae_cv:.1f}% (consistent across seeds)")
else:
    print(f"‚ö†Ô∏è Moderate stability: CV={mae_cv:.1f}% (some variation across seeds)")

print(f"\nüìä Range of results: {min(seed_test_maes_add):.4f} - {max(seed_test_maes_add):.4f} D")
print(f"   This {max(seed_test_maes_add)-min(seed_test_maes_add):.4f} D range shows the impact of data split")

# Parameter consistency check
print(f"\nüìä Parameter consistency across seeds:")
for i, param_name in enumerate(['a‚ÇÄ', 'a‚ÇÅ', 'a‚ÇÇ', 'a‚ÇÉ']):
    param_values = [p[i] for p in seed_results_add]
    print(f"  {param_name}: min={min(param_values):.4f}, max={max(param_values):.4f}, range={max(param_values)-min(param_values):.4f}")

print("\nüí° RIDGE VALIDATION:")
print("-" * 50)
print("‚Ä¢ This formula uses features identified by Ridge as important")
print("‚Ä¢ CCT_norm and CCT_ratio were top Ridge features")
if np.mean(seed_improvements_add) > 10:
    print(f"‚Ä¢ Achieving {np.mean(seed_improvements_add):.1f}% average improvement confirms Ridge insights work!")
elif np.mean(seed_improvements_add) > 0:
    print(f"‚Ä¢ Modest {np.mean(seed_improvements_add):.1f}% improvement - Ridge features help but may need refinement")
else:
    print("‚Ä¢ Limited improvement suggests these features may not generalize well")

In [None]:
# COMBINED APPROACH WITH K-FOLD CROSS-VALIDATION - MULTI-SEED
# ========================================================
# PURPOSE: Combine all three methods with nested K-fold CV and multi-seed validation
# Most complex but potentially most accurate approach

print("=" * 80)
print("COMBINED FORMULA WITH K-FOLD CV - MULTI-SEED ANALYSIS")
print("=" * 80)

print("\nüéØ MULTI-SEED NESTED CV FOR COMBINED APPROACH:")
print("-" * 50)
print(f"‚Ä¢ Testing {len(SEEDS)} different random seeds: {SEEDS}")
print("‚Ä¢ Each seed: 75/25 train/test split")
print("‚Ä¢ Inner: 5-fold CV for each method")
print("‚Ä¢ Combine all optimized corrections")

from sklearn.model_selection import train_test_split, KFold
from scipy.optimize import minimize, differential_evolution
import numpy as np

# Store results for each seed
seed_results_combined = []
seed_test_maes_combined = []
seed_train_maes_combined = []
seed_baseline_maes_combined = []
seed_improvements_combined = []
seed_overfit_ratios_combined = []

# Store individual method results
seed_param_results = []
seed_mult_results = []
seed_add_results = []

print("\n" + "="*80)
print("RUNNING MULTI-SEED ANALYSIS")
print("="*80)

for seed_idx, SEED in enumerate(SEEDS, 1):
    print(f"\n{'='*40}")
    print(f"SEED {seed_idx}/{len(SEEDS)}: {SEED}")
    print(f"{'='*40}")
    
    # OUTER SPLIT - consistent across all methods
    X_train_comb, X_test_comb = train_test_split(df, test_size=0.25, random_state=SEED)
    X_train_comb['K_avg'] = (X_train_comb['Bio-Ks'] + X_train_comb['Bio-Kf']) / 2
    X_test_comb['K_avg'] = (X_test_comb['Bio-Ks'] + X_test_comb['Bio-Kf']) / 2
    
    print(f"üìä Split: {len(X_train_comb)} train, {len(X_test_comb)} test")
    
    # Calculate baseline for all
    for dataset in [X_train_comb, X_test_comb]:
        dataset['SRKT2_Baseline'] = dataset.apply(
            lambda row: calculate_SRKT2(
                AL=row['Bio-AL'],
                K_avg=row['K_avg'],
                IOL_power=row['IOL Power'],
                A_constant=row['A-Constant']
            ), axis=1
        )
    
    print("\nüìÅ K-FOLD CV FOR EACH METHOD:")
    print("-" * 40)
    
    # Setup K-fold
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    
    # Store fold results for each method
    param_fold_results = []
    mult_fold_results = []
    add_fold_results = []
    combined_fold_maes = []
    
    for fold_num, (train_idx, val_idx) in enumerate(kf.split(X_train_comb), 1):
        print(f"  Fold {fold_num}/5: ", end="")
        
        fold_train = X_train_comb.iloc[train_idx]
        fold_val = X_train_comb.iloc[val_idx]
        
        # 1. PARAMETER METHOD
        def param_obj(params, df_data):
            nc_base, nc_cct, k_base, k_cct, acd_base, acd_cct = params
            predictions = []
            for _, row in df_data.iterrows():
                cct_norm = (row['CCT'] - 600) / 100
                nc = nc_base + nc_cct * cct_norm
                k_index = k_base + k_cct * cct_norm
                acd_offset = acd_base + acd_cct * cct_norm
                pred = calculate_SRKT2(
                    AL=row['Bio-AL'], K_avg=row['K_avg'],
                    IOL_power=row['IOL Power'],
                    A_constant=row['A-Constant'] + acd_offset,
                    nc=nc, k_index=k_index
                )
                predictions.append(pred)
            return mean_absolute_error(df_data['PostOP Spherical Equivalent'], predictions)
        
        bounds_p = [(1.20, 1.50), (-0.20, 0.20), (1.20, 1.60), (-0.30, 0.30), (-3.0, 3.0), (-3.0, 3.0)]
        result_p = differential_evolution(lambda p: param_obj(p, fold_train), bounds_p, 
                                         maxiter=20, seed=SEED+fold_num, disp=False)
        param_fold_results.append(result_p.x)
        
        # 2. MULTIPLICATIVE METHOD
        def mult_obj(params, df_data):
            m0, m1, m2 = params
            predictions = []
            for _, row in df_data.iterrows():
                base_pred = row['SRKT2_Baseline']
                cct_norm = (row['CCT'] - 600) / 100
                cct_ratio = row['CCT'] / row['Bio-AL']
                correction = 1 + m0 + m1 * cct_norm + m2 * cct_ratio
                predictions.append(base_pred * correction)
            return mean_absolute_error(df_data['PostOP Spherical Equivalent'], predictions)
        
        result_m = minimize(lambda p: mult_obj(p, fold_train), [0,0,0], 
                           method='L-BFGS-B', bounds=[(-0.5,0.5)]*3)
        mult_fold_results.append(result_m.x)
        
        # 3. ADDITIVE METHOD
        def add_obj(params, df_data):
            a0, a1, a2, a3 = params
            predictions = []
            for _, row in df_data.iterrows():
                base_pred = row['SRKT2_Baseline']
                cct_norm = (row['CCT'] - 600) / 100
                cct_ratio = row['CCT'] / row['Bio-AL']
                correction = a0 + a1 * cct_norm + a2 * cct_ratio + a3 * row['K_avg']
                predictions.append(base_pred + correction)
            return mean_absolute_error(df_data['PostOP Spherical Equivalent'], predictions)
        
        result_a = minimize(lambda p: add_obj(p, fold_train), [0,0,0,0],
                           method='L-BFGS-B', bounds=[(-2,2),(-2,2),(-2,2),(-0.1,0.1)])
        add_fold_results.append(result_a.x)
        
        # VALIDATE COMBINED on fold validation set
        nc_b, nc_c, k_b, k_c, acd_b, acd_c = result_p.x
        m0, m1, m2 = result_m.x
        a0, a1, a2, a3 = result_a.x
        
        combined_preds = []
        for _, row in fold_val.iterrows():
            cct_norm = (row['CCT'] - 600) / 100
            cct_ratio = row['CCT'] / row['Bio-AL']
            
            # Modified SRK/T2
            nc = nc_b + nc_c * cct_norm
            k_index = k_b + k_c * cct_norm
            acd_offset = acd_b + acd_c * cct_norm
            modified = calculate_SRKT2(
                AL=row['Bio-AL'], K_avg=row['K_avg'],
                IOL_power=row['IOL Power'],
                A_constant=row['A-Constant'] + acd_offset,
                nc=nc, k_index=k_index
            )
            
            # Apply multiplicative
            mult_factor = 1 + m0 + m1 * cct_norm + m2 * cct_ratio
            after_mult = modified * mult_factor
            
            # Apply additive
            add_correction = a0 + a1 * cct_norm + a2 * cct_ratio + a3 * row['K_avg']
            final = after_mult + add_correction
            
            combined_preds.append(final)
        
        fold_mae = mean_absolute_error(fold_val['PostOP Spherical Equivalent'], combined_preds)
        combined_fold_maes.append(fold_mae)
        print(f"MAE={fold_mae:.4f} ", end="")
    
    print()  # New line after folds
    
    # Average parameters across folds
    avg_param = np.mean(param_fold_results, axis=0)
    avg_mult = np.mean(mult_fold_results, axis=0)
    avg_add = np.mean(add_fold_results, axis=0)
    avg_combined_mae = np.mean(combined_fold_maes)
    std_combined_mae = np.std(combined_fold_maes)
    
    print(f"  CV MAE: {avg_combined_mae:.4f} ¬± {std_combined_mae:.4f} D")
    
    # FINAL RETRAINING on full training set
    print("  Final optimization on full training set...")
    
    result_p_final = differential_evolution(lambda p: param_obj(p, X_train_comb), bounds_p, 
                                           maxiter=50, seed=SEED, disp=False)
    nc_base_c, nc_cct_c, k_base_c, k_cct_c, acd_base_c, acd_cct_c = result_p_final.x
    
    result_m_final = minimize(lambda p: mult_obj(p, X_train_comb), [0,0,0], 
                             method='L-BFGS-B', bounds=[(-0.5,0.5)]*3)
    m0_c, m1_c, m2_c = result_m_final.x
    
    result_a_final = minimize(lambda p: add_obj(p, X_train_comb), [0,0,0,0],
                             method='L-BFGS-B', bounds=[(-2,2),(-2,2),(-2,2),(-0.1,0.1)])
    a0_c, a1_c, a2_c, a3_c = result_a_final.x
    
    # EVALUATE ON TRAINING SET (for overfitting check)
    predictions_combined_train = []
    for _, row in X_train_comb.iterrows():
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        k_avg = row['K_avg']
        
        # Modified SRK/T2
        nc = nc_base_c + nc_cct_c * cct_norm
        k_index = k_base_c + k_cct_c * cct_norm
        acd_offset = acd_base_c + acd_cct_c * cct_norm
        
        modified_srkt2 = calculate_SRKT2(
            AL=row['Bio-AL'], K_avg=k_avg,
            IOL_power=row['IOL Power'],
            A_constant=row['A-Constant'] + acd_offset,
            nc=nc, k_index=k_index
        )
        
        # Apply multiplicative
        mult_factor = 1 + m0_c + m1_c * cct_norm + m2_c * cct_ratio
        after_mult = modified_srkt2 * mult_factor
        
        # Apply additive
        add_correction = a0_c + a1_c * cct_norm + a2_c * cct_ratio + a3_c * k_avg
        final_combined = after_mult + add_correction
        
        predictions_combined_train.append(final_combined)
    
    mae_train = mean_absolute_error(X_train_comb['PostOP Spherical Equivalent'], predictions_combined_train)
    
    # TEST ON HOLDOUT
    predictions_combined_test = []
    predictions_mult_only = []
    
    for _, row in X_test_comb.iterrows():
        cct_norm = (row['CCT'] - 600) / 100
        cct_ratio = row['CCT'] / row['Bio-AL']
        k_avg = row['K_avg']
        
        # Modified SRK/T2
        nc = nc_base_c + nc_cct_c * cct_norm
        k_index = k_base_c + k_cct_c * cct_norm
        acd_offset = acd_base_c + acd_cct_c * cct_norm
        
        modified_srkt2 = calculate_SRKT2(
            AL=row['Bio-AL'], K_avg=k_avg,
            IOL_power=row['IOL Power'],
            A_constant=row['A-Constant'] + acd_offset,
            nc=nc, k_index=k_index
        )
        
        # Multiplicative only (for comparison)
        mult_factor = 1 + m0_c + m1_c * cct_norm + m2_c * cct_ratio
        mult_only = row['SRKT2_Baseline'] * mult_factor
        predictions_mult_only.append(mult_only)
        
        # Combined: all three
        after_mult = modified_srkt2 * mult_factor
        add_correction = a0_c + a1_c * cct_norm + a2_c * cct_ratio + a3_c * k_avg
        final_combined = after_mult + add_correction
        predictions_combined_test.append(final_combined)
    
    mae_baseline = np.abs(X_test_comb['SRKT2_Baseline'] - X_test_comb['PostOP Spherical Equivalent']).mean()
    mae_optimized = mean_absolute_error(X_test_comb['PostOP Spherical Equivalent'], predictions_combined_test)
    improvement = (mae_baseline - mae_optimized) / mae_baseline * 100
    
    print(f"  Train MAE: {mae_train:.4f}, Test MAE: {mae_optimized:.4f}")
    print(f"  Test: Baseline={mae_baseline:.4f}, Combined={mae_optimized:.4f}")
    print(f"  Improvement: {improvement:.1f}%")
    
    # Check for overfitting
    overfit_ratio = (mae_optimized - mae_train) / mae_train * 100
    if overfit_ratio > 20:
        print(f"  ‚ö†Ô∏è Overfitting detected: Test {overfit_ratio:.1f}% worse than train")
    elif overfit_ratio > 10:
        print(f"  ‚ö†Ô∏è Mild overfitting: Test {overfit_ratio:.1f}% worse than train")
    else:
        print(f"  ‚úÖ Good generalization: Test only {overfit_ratio:.1f}% worse than train")
    
    # Store results
    seed_results_combined.append({
        'param': [nc_base_c, nc_cct_c, k_base_c, k_cct_c, acd_base_c, acd_cct_c],
        'mult': [m0_c, m1_c, m2_c],
        'add': [a0_c, a1_c, a2_c, a3_c]
    })
    seed_test_maes_combined.append(mae_optimized)
    seed_train_maes_combined.append(mae_train)
    seed_baseline_maes_combined.append(mae_baseline)
    seed_improvements_combined.append(improvement)
    seed_overfit_ratios_combined.append(overfit_ratio)

# MULTI-SEED SUMMARY
print("\n" + "="*80)
print("COMBINED APPROACH - MULTI-SEED SUMMARY")
print("="*80)

print("\nüìä TEST PERFORMANCE ACROSS SEEDS:")
print("-" * 50)
for i, seed in enumerate(SEEDS):
    print(f"  Seed {seed:3}: MAE={seed_test_maes_combined[i]:.4f} D, Improvement={seed_improvements_combined[i]:.1f}%")

print("\nüìà STATISTICAL SUMMARY:")
print("-" * 50)
print(f"  Baseline MAE:      {np.mean(seed_baseline_maes_combined):.4f} ¬± {np.std(seed_baseline_maes_combined):.4f} D")
print(f"  Train MAE:         {np.mean(seed_train_maes_combined):.4f} ¬± {np.std(seed_train_maes_combined):.4f} D")
print(f"  Test MAE:          {np.mean(seed_test_maes_combined):.4f} ¬± {np.std(seed_test_maes_combined):.4f} D")
print(f"  Mean Improvement:  {np.mean(seed_improvements_combined):.1f} ¬± {np.std(seed_improvements_combined):.1f}%")
print(f"  Best seed:         {SEEDS[np.argmin(seed_test_maes_combined)]} (MAE={min(seed_test_maes_combined):.4f})")
print(f"  Worst seed:        {SEEDS[np.argmax(seed_test_maes_combined)]} (MAE={max(seed_test_maes_combined):.4f})")

# OVERFITTING ANALYSIS
print("\nüîç OVERFITTING ANALYSIS:")
print("-" * 50)
print(f"  Mean overfit ratio: {np.mean(seed_overfit_ratios_combined):.1f}%")
print(f"  (Test MAE is {np.mean(seed_overfit_ratios_combined):.1f}% worse than Train MAE on average)")

if np.mean(seed_overfit_ratios_combined) < 10:
    print("  ‚úÖ Excellent generalization - minimal overfitting")
elif np.mean(seed_overfit_ratios_combined) < 20:
    print("  ‚úÖ Good generalization - acceptable overfitting")
else:
    print("  ‚ö†Ô∏è Significant overfitting - consider regularization")
    print("  Note: Combined approach has more parameters, higher overfitting risk")

# Clinical accuracy
all_errors = []
for i in range(len(SEEDS)):
    errors = np.abs(np.array(seed_test_maes_combined[i]))
    all_errors.append(errors)

mean_mae = np.mean(seed_test_maes_combined)
within_050 = sum(1 for mae in seed_test_maes_combined if mae <= 0.50) / len(seed_test_maes_combined) * 100
within_100 = sum(1 for mae in seed_test_maes_combined if mae <= 1.00) / len(seed_test_maes_combined) * 100

print(f"\nüìà CLINICAL PERFORMANCE (Combined):")
print("-" * 50)
print(f"  Seeds with MAE < 0.50 D: {within_050:.0f}%")
print(f"  Seeds with MAE < 1.00 D: {within_100:.0f}%")

# Store in global results dictionary
multi_seed_results['combined'] = {
    'test_maes': seed_test_maes_combined,
    'train_maes': seed_train_maes_combined,
    'baseline_maes': seed_baseline_maes_combined,
    'improvements': seed_improvements_combined,
    'overfit_ratios': seed_overfit_ratios_combined,
    'mean_mae': np.mean(seed_test_maes_combined),
    'std_mae': np.std(seed_test_maes_combined),
    'mean_improvement': np.mean(seed_improvements_combined)
}

# Extract consensus parameters
all_param_params = [r['param'] for r in seed_results_combined]
all_mult_params = [r['mult'] for r in seed_results_combined]
all_add_params = [r['add'] for r in seed_results_combined]

avg_param_params = np.mean(all_param_params, axis=0)
avg_mult_params = np.mean(all_mult_params, axis=0)
avg_add_params = np.mean(all_add_params, axis=0)

print("\n‚úÖ CONSENSUS PARAMETERS (averaged across seeds):")
print("-" * 70)
print("1. Modified SRK/T2:")
print(f"   nc = {avg_param_params[0]:.4f} + {avg_param_params[1]:.4f} √ó CCT_norm")
print(f"   k_index = {avg_param_params[2]:.4f} + {avg_param_params[3]:.4f} √ó CCT_norm")
print(f"   ACD_offset = {avg_param_params[4]:.4f} + {avg_param_params[5]:.4f} √ó CCT_norm")
print("2. Multiplicative:")
print(f"   Factor = 1 + {avg_mult_params[0]:.4f} + {avg_mult_params[1]:.4f} √ó CCT_norm + {avg_mult_params[2]:.4f} √ó CCT_ratio")
print("3. Additive:")
print(f"   Term = {avg_add_params[0]:.4f} + {avg_add_params[1]:.4f} √ó CCT_norm + {avg_add_params[2]:.4f} √ó CCT_ratio + {avg_add_params[3]:.4f} √ó K_avg")

print("\nüí° ROBUSTNESS ANALYSIS:")
print("-" * 50)
mae_cv = np.std(seed_test_maes_combined) / np.mean(seed_test_maes_combined) * 100
if mae_cv < 5:
    print(f"‚úÖ Excellent stability: CV={mae_cv:.1f}% (very consistent across seeds)")
elif mae_cv < 10:
    print(f"‚úÖ Good stability: CV={mae_cv:.1f}% (consistent across seeds)")
else:
    print(f"‚ö†Ô∏è Moderate stability: CV={mae_cv:.1f}% (some variation across seeds)")

print(f"\nüìä Range of results: {min(seed_test_maes_combined):.4f} - {max(seed_test_maes_combined):.4f} D")
print(f"   This {max(seed_test_maes_combined)-min(seed_test_maes_combined):.4f} D range shows the impact of data split")

print("\nüí° COMBINED APPROACH INSIGHTS:")
print("-" * 50)
if np.mean(seed_overfit_ratios_combined) > np.mean(seed_improvements_combined) * 0.5:
    print("‚ö†Ô∏è High complexity may be causing overfitting")
    print("   Consider using simpler approach (multiplicative only)")
else:
    print("‚úÖ Combined approach balances complexity and performance")
    print("   The three corrections work synergistically")

In [None]:
# MULTI-SEED COMPARISON - FINAL COMPREHENSIVE SUMMARY
# ====================================================
# PURPOSE: Compare all methods across multiple seeds for robust conclusions

print("=" * 80)
print("MULTI-SEED ANALYSIS - COMPREHENSIVE COMPARISON")
print("=" * 80)

print(f"\nüî¨ VALIDATION SETUP:")
print("-" * 50)
print(f"‚Ä¢ Seeds tested: {SEEDS}")
print(f"‚Ä¢ Train/Test split: 75%/25% (72/24 patients)")
print(f"‚Ä¢ Inner validation: 5-fold CV")
print(f"‚Ä¢ All results on holdout test sets")

# Check which methods have been run
available_methods = []
for method in ['parameter', 'multiplicative', 'additive', 'combined', 'fixed_combined']:
    if method in multi_seed_results and multi_seed_results[method]:
        available_methods.append(method)

if not available_methods:
    print("\n‚ö†Ô∏è No multi-seed results found yet!")
    print("Please run the optimization cells first.")
else:
    print(f"\n‚úÖ Methods analyzed: {', '.join(available_methods)}")
    
    # Create comparison table
    print("\n" + "="*80)
    print("PERFORMANCE COMPARISON ACROSS METHODS AND SEEDS")
    print("="*80)
    
    # Detailed table by seed
    print("\nüìä DETAILED RESULTS BY SEED:")
    print("-" * 80)
    print(f"{'Method':<20} | ", end="")
    for seed in SEEDS:
        print(f"Seed {seed:3} | ", end="")
    print(f"{'Mean ¬± Std':<15} | {'Best':<6} | {'Worst':<6}")
    print("-" * 80)
    
    for method in available_methods:
        results = multi_seed_results[method]
        print(f"{method.capitalize():<20} | ", end="")
        for mae in results['test_maes']:
            print(f"{mae:7.4f} | ", end="")
        mean_mae = results['mean_mae']
        std_mae = results['std_mae']
        print(f"{mean_mae:.4f} ¬± {std_mae:.4f} | ", end="")
        print(f"{min(results['test_maes']):.4f} | {max(results['test_maes']):.4f}")
    
    # Summary statistics
    print("\n" + "="*80)
    print("STATISTICAL SUMMARY")
    print("="*80)
    
    print("\nüìà MEAN PERFORMANCE (averaged across seeds):")
    print("-" * 50)
    
    # Sort methods by mean MAE
    sorted_methods = sorted(available_methods, 
                          key=lambda m: multi_seed_results[m]['mean_mae'])
    
    for rank, method in enumerate(sorted_methods, 1):
        results = multi_seed_results[method]
        mean_mae = results['mean_mae']
        std_mae = results['std_mae']
        mean_imp = results['mean_improvement']
        
        print(f"{rank}. {method.capitalize():<20}: MAE = {mean_mae:.4f} ¬± {std_mae:.4f} D")
        print(f"   {'':20}  Improvement = {mean_imp:.1f}%")
    
    # Best overall method
    best_method = sorted_methods[0]
    best_results = multi_seed_results[best_method]
    
    print(f"\nüèÜ BEST METHOD: {best_method.upper()}")
    print(f"   Mean MAE: {best_results['mean_mae']:.4f} ¬± {best_results['std_mae']:.4f} D")
    print(f"   Mean Improvement: {best_results['mean_improvement']:.1f}%")
    
    # Robustness analysis
    print("\n" + "="*80)
    print("ROBUSTNESS ANALYSIS")
    print("="*80)
    
    print("\nüìä STABILITY ACROSS SEEDS (Coefficient of Variation):")
    print("-" * 50)
    
    stability_scores = []
    for method in available_methods:
        results = multi_seed_results[method]
        cv = (results['std_mae'] / results['mean_mae']) * 100
        stability_scores.append((method, cv))
    
    # Sort by stability (lower CV is better)
    stability_scores.sort(key=lambda x: x[1])
    
    for method, cv in stability_scores:
        if cv < 5:
            status = "‚úÖ Excellent"
        elif cv < 10:
            status = "‚úÖ Good"
        elif cv < 15:
            status = "‚ö†Ô∏è Moderate"
        else:
            status = "‚ö†Ô∏è Variable"
        print(f"  {method.capitalize():<20}: CV = {cv:5.1f}%  {status}")
    
    # Range analysis
    print("\nüìä PERFORMANCE RANGE (max - min across seeds):")
    print("-" * 50)
    
    for method in available_methods:
        results = multi_seed_results[method]
        mae_range = max(results['test_maes']) - min(results['test_maes'])
        print(f"  {method.capitalize():<20}: {mae_range:.4f} D")
    
    # Statistical significance insights
    print("\n" + "="*80)
    print("KEY INSIGHTS")
    print("="*80)
    
    print("\nüí° STATISTICAL CONCLUSIONS:")
    print("-" * 50)
    
    # Check if best method is consistently best
    best_count = 0
    for i in range(len(SEEDS)):
        seed_maes = {m: multi_seed_results[m]['test_maes'][i] for m in available_methods}
        if min(seed_maes, key=seed_maes.get) == best_method:
            best_count += 1
    
    consistency = (best_count / len(SEEDS)) * 100
    print(f"‚Ä¢ {best_method.capitalize()} was best in {best_count}/{len(SEEDS)} seeds ({consistency:.0f}%)")
    
    # Check overlap in confidence intervals
    if len(available_methods) > 1:
        print("\n‚Ä¢ Confidence intervals (mean ¬± std):")
        for method in sorted_methods[:3]:  # Top 3 methods
            results = multi_seed_results[method]
            lower = results['mean_mae'] - results['std_mae']
            upper = results['mean_mae'] + results['std_mae']
            print(f"  {method.capitalize():<20}: [{lower:.4f}, {upper:.4f}] D")
    
    # Clinical relevance
    print("\nüìè CLINICAL RELEVANCE:")
    print("-" * 50)
    
    baseline_mean = np.mean(multi_seed_results[available_methods[0]]['baseline_maes'])
    for method in sorted_methods:
        results = multi_seed_results[method]
        mean_mae = results['mean_mae']
        
        if mean_mae < 0.5:
            clinical = "Excellent (< 0.5 D)"
        elif mean_mae < 0.75:
            clinical = "Good (< 0.75 D)"
        elif mean_mae < 1.0:
            clinical = "Acceptable (< 1.0 D)"
        else:
            clinical = "Poor (‚â• 1.0 D)"
        
        print(f"  {method.capitalize():<20}: {clinical}")
    
    print("\n" + "="*80)
    print("RECOMMENDATIONS")
    print("="*80)
    
    print("\n‚úÖ FINAL RECOMMENDATIONS:")
    print("-" * 50)
    
    # Find most stable method
    most_stable = min(stability_scores, key=lambda x: x[1])
    
    print(f"1. Best performance: {best_method.capitalize()} (MAE = {best_results['mean_mae']:.4f} D)")
    print(f"2. Most stable: {most_stable[0].capitalize()} (CV = {most_stable[1]:.1f}%)")
    
    if best_method == most_stable[0]:
        print(f"\nüéØ {best_method.capitalize()} is both best performing AND most stable!")
        print("   This is the recommended approach for clinical use.")
    else:
        print(f"\n‚öñÔ∏è Trade-off detected:")
        print(f"   ‚Ä¢ {best_method.capitalize()}: Better performance but less stable")
        print(f"   ‚Ä¢ {most_stable[0].capitalize()}: More stable but slightly worse performance")
        print("   Choose based on clinical priorities.")
    
    print("\nüìù PUBLICATION-READY SUMMARY:")
    print("-" * 50)
    print(f"Using {len(SEEDS)} different random seeds for validation,")
    print(f"{best_method.capitalize()} achieved the best mean MAE of {best_results['mean_mae']:.3f} ¬± {best_results['std_mae']:.3f} D,")
    print(f"representing a {best_results['mean_improvement']:.1f}% improvement over baseline SRK/T2.")
    
print("\n" + "="*80)
print("END OF MULTI-SEED ANALYSIS")
print("="*80)