# MIMIC-IV 30-Day Readmission Prediction - Baseline Models
## Comprehensive Classification Model Comparison

**Objective**: Build and evaluate baseline machine learning models for 30-day hospital readmission

**Models**: Logistic Regression, Random Forest, XGBoost, SVM, Naive Bayes

**Metrics**: AUROC, AUPRC, F1-Score, Accuracy, Precision, Recall

## Section 1: Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import json
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    roc_auc_score, auc, roc_curve, precision_recall_curve,
    confusion_matrix, classification_report, f1_score,
    accuracy_score, precision_score, recall_score
)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

print("✅ Libraries imported successfully")

✅ Libraries imported successfully


## Section 2: Load Data

In [4]:
data_dir = '/Users/yuchenzhou/Documents/duke/compsci526/final_proj/mimic_data/processed_data'

df = pd.read_csv(os.path.join(data_dir, 'readmission_features_cleaned.csv'))
print(f"Data shape: {df.shape}")
print(f"Target distribution:\n{df['readmit_30d'].value_counts()}")

Data shape: (546038, 68)
Target distribution:
readmit_30d
0    438774
1    107264
Name: count, dtype: int64


## Section 3: Preprocessing

In [None]:
cols_to_drop = [col for col in df.columns if col in ['subject_id', 'hadm_id', 'primary_icd_code']]
df_processed = df.drop(columns=cols_to_drop, errors='ignore')

X = df_processed.drop(columns=['readmit_30d'])
y = df_processed['readmit_30d'].copy()

print(f"Features: {X.shape[1]}")
print(f"Target - 0: {(y==0).sum()}, 1: {(y==1).sum()}")

Features: 64
Target - 0: 438774, 1: 107264


## Section 4: Train-Test Split & Scaling

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

print(f"Train: {X_train_scaled.shape}, Test: {X_test_scaled.shape}")

Train: (436830, 64), Test: (109208, 64)


## Section 5-7: Build & Train Models

In [None]:
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced'),
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=20, random_state=42, n_jobs=-1, class_weight='balanced'),
    'XGBoost': xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42, scale_pos_weight=scale_pos_weight, eval_metric='logloss'),
    'SVM': SVC(kernel='rbf', C=1.0, random_state=42, probability=True, class_weight='balanced'),
    'Naive Bayes': GaussianNB()
}

results = {}

for name, model in models.items():
    print(f"Training {name}...", end=" ")
    model.fit(X_train_scaled, y_train)
    
    y_test_pred = model.predict(X_test_scaled)
    y_test_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
    
    results[name] = {
        'y_test_pred': y_test_pred,
        'y_test_pred_proba': y_test_pred_proba,
        'model': model
    }
    print("✅")

print("\n✅ All models trained")

Training Logistic Regression... ✅
Training Random Forest... ✅
Training XGBoost... ✅
Training SVM... 

## Section 8: Evaluate Performance

In [None]:
metrics_summary = {}

print("\n" + "="*100)
print(f"{'Model':<20} {'AUROC':<12} {'AUPRC':<12} {'F1':<12} {'Accuracy':<12} {'Precision':<12} {'Recall':<12}")
print("="*100)

for name, result in results.items():
    y_pred = result['y_test_pred']
    y_pred_proba = result['y_test_pred_proba']
    
    auroc = roc_auc_score(y_test, y_pred_proba)
    precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
    auprc = auc(recall_curve, precision_curve)
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    
    metrics_summary[name] = {
        'AUROC': auroc,
        'AUPRC': auprc,
        'F1': f1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'y_test_pred': y_pred,
        'y_test_pred_proba': y_pred_proba
    }
    
    print(f"{name:<20} {auroc:<12.4f} {auprc:<12.4f} {f1:<12.4f} {accuracy:<12.4f} {precision:<12.4f} {recall:<12.4f}")

print("="*100)

best_model = max(metrics_summary, key=lambda x: metrics_summary[x]['AUROC'])
print(f"\n🏆 Best Model: {best_model} (AUROC: {metrics_summary[best_model]['AUROC']:.4f})")

## Section 9: Visualize Results

In [None]:
# ROC Curves
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

ax = axes[0]
for name, metrics in metrics_summary.items():
    fpr, tpr, _ = roc_curve(y_test, metrics['y_test_pred_proba'])
    ax.plot(fpr, tpr, label=f"{name} (AUROC={metrics['AUROC']:.4f})", linewidth=2)

ax.plot([0, 1], [0, 1], 'k--', label='Random', linewidth=1)
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('ROC Curves')
ax.legend(fontsize=10)
ax.grid(alpha=0.3)

# AUROC Comparison
ax = axes[1]
names = list(metrics_summary.keys())
auroc_scores = [metrics_summary[name]['AUROC'] for name in names]
colors = plt.cm.viridis(np.linspace(0, 1, len(names)))

bars = ax.barh(names, auroc_scores, color=colors)
ax.set_xlabel('AUROC')
ax.set_xlim([0.5, 1.0])
ax.set_title('Model Comparison')

for i, (bar, score) in enumerate(zip(bars, auroc_scores)):
    ax.text(score + 0.01, i, f'{score:.4f}', va='center')

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'roc_comparison.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ ROC plots saved")

In [None]:
# Confusion Matrices
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
axes = axes.flatten()

for idx, (name, metrics) in enumerate(sorted(metrics_summary.items())):
    cm = confusion_matrix(y_test, metrics['y_test_pred'])
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx], cbar=False,
                xticklabels=['No', 'Yes'],
                yticklabels=['No', 'Yes'])
    axes[idx].set_title(f'{name} (F1={metrics["F1"]:.4f})')
    axes[idx].set_ylabel('True')
    axes[idx].set_xlabel('Predicted')

axes[-1].axis('off')

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'confusion_matrices.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ Confusion matrices saved")

In [None]:
# Metrics Heatmap
metrics_df = pd.DataFrame(metrics_summary).T
metrics_cols = ['AUROC', 'AUPRC', 'F1', 'Accuracy', 'Precision', 'Recall']

fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(metrics_df[metrics_cols], annot=True, fmt='.4f', cmap='RdYlGn', ax=ax,
            cbar_kws={'label': 'Score'}, vmin=0, vmax=1, linewidths=1)

ax.set_title('Model Performance Metrics')
ax.set_xlabel('Metrics')
ax.set_ylabel('Models')
plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'metrics_heatmap.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ Metrics heatmap saved")

In [None]:
# Feature Importance
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Random Forest
rf_model = results['Random Forest']['model']
fi_rf = pd.DataFrame({
    'feature': X_train.columns,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False).head(20)

axes[0].barh(range(len(fi_rf)), fi_rf['importance'], color='steelblue')
axes[0].set_yticks(range(len(fi_rf)))
axes[0].set_yticklabels(fi_rf['feature'])
axes[0].set_xlabel('Importance')
axes[0].set_title('Random Forest - Top 20')
axes[0].invert_yaxis()

# Gradient Boosting
gb_model = results['Gradient Boosting']['model']
fi_gb = pd.DataFrame({
    'feature': X_train.columns,
    'importance': gb_model.feature_importances_
}).sort_values('importance', ascending=False).head(20)

axes[1].barh(range(len(fi_gb)), fi_gb['importance'], color='coral')
axes[1].set_yticks(range(len(fi_gb)))
axes[1].set_yticklabels(fi_gb['feature'])
axes[1].set_xlabel('Importance')
axes[1].set_title('Gradient Boosting - Top 20')
axes[1].invert_yaxis()

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'feature_importance.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ Feature importance saved")

## Section 10: Cross-Validation & Summary

In [None]:
print("\n" + "="*80)
print("Cross-Validation (5-Fold)")
print("="*80)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_results = {}

for name, model in models.items():
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=skf, scoring='roc_auc', n_jobs=-1)
    cv_results[name] = {'mean': cv_scores.mean(), 'std': cv_scores.std()}
    print(f"{name:<20} AUROC: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")

# Save results
report = f"""
{'='*100}
MIMIC-IV 30-Day Readmission - Baseline Models Report
{'='*100}

【Test Set Performance】
{'-'*100}
{'Model':<20} {'AUROC':<12} {'AUPRC':<12} {'F1':<12} {'Accuracy':<12}
{'-'*100}
"""

for name in sorted(metrics_summary.keys()):
    m = metrics_summary[name]
    report += f"{name:<20} {m['AUROC']:<12.4f} {m['AUPRC']:<12.4f} {m['F1']:<12.4f} {m['Accuracy']:<12.4f}\n"

report += f"""
【Cross-Validation Results】
{'-'*100}
{'Model':<20} {'CV AUROC Mean':<18} {'Std Dev':<12}
{'-'*100}
"""

for name in sorted(cv_results.keys()):
    report += f"{name:<20} {cv_results[name]['mean']:<18.4f} {cv_results[name]['std']:<12.4f}\n"

best_model = max(metrics_summary, key=lambda x: metrics_summary[x]['AUROC'])
report += f"""
【Best Model】
{'-'*100}
Model: {best_model}
Test AUROC: {metrics_summary[best_model]['AUROC']:.4f}
CV AUROC: {cv_results[best_model]['mean']:.4f} (+/- {cv_results[best_model]['std']:.4f})

{'='*100}
"""

# Save report
report_path = os.path.join(data_dir, 'baseline_models_report.txt')
with open(report_path, 'w') as f:
    f.write(report)

# Save metrics to JSON
metrics_json = {}
for name, m in metrics_summary.items():
    metrics_json[name] = {
        'AUROC': float(m['AUROC']),
        'AUPRC': float(m['AUPRC']),
        'F1': float(m['F1']),
        'Accuracy': float(m['Accuracy']),
        'Precision': float(m['Precision']),
        'Recall': float(m['Recall']),
        'CV_Mean': float(cv_results[name]['mean']),
        'CV_Std': float(cv_results[name]['std'])
    }

json_path = os.path.join(data_dir, 'baseline_models_metrics.json')
with open(json_path, 'w') as f:
    json.dump(metrics_json, f, indent=2)

print(f"\n✅ Results saved to {data_dir}")
print(f"   - Report: baseline_models_report.txt")
print(f"   - Metrics: baseline_models_metrics.json")
print(f"   - Plots: roc_comparison.png, confusion_matrices.png, metrics_heatmap.png, feature_importance.png")

# MIMIC-IV 30-Day Readmission Prediction - Baseline Models
## Comprehensive Classification Model Comparison

**Objective**: Build and evaluate baseline machine learning models for predicting 30-day hospital readmission

**Models to Compare**:
1. Logistic Regression (Linear Baseline)
2. Random Forest (Tree-based Baseline)
3. Gradient Boosting (Gradient Boosting)
4. SVM (Support Vector Machine)
5. Naive Bayes (Probabilistic Baseline)

**Evaluation Metrics**:
- AUROC (Area Under Receiver Operating Characteristic Curve)
- AUPRC (Area Under Precision-Recall Curve)
- F1-Score
- Accuracy
- Precision & Recall

**Generated**: 2025-10-16


## Section 1: Import Required Libraries

In [None]:
%pip install -q pandas numpy scikit-learn matplotlib seaborn

# Import core data handling libraries
import pandas as pd
import numpy as np
import os
import json
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')

# Import sklearn utilities
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (
    roc_auc_score, auc, roc_curve, precision_recall_curve,
    confusion_matrix, classification_report, f1_score,
    accuracy_score, precision_score, recall_score
)

# Import baseline models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# Import visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 10

print("✅ All libraries imported successfully")
print(f"   scikit-learn version: {__import__('sklearn').__version__}")
print(f"   Pandas version: {pd.__version__}")


Note: you may need to restart the kernel to use updated packages.


XGBoostError: 
XGBoost Library (libxgboost.dylib) could not be loaded.
Likely causes:
  * OpenMP runtime is not installed
    - vcomp140.dll or libgomp-1.dll for Windows
    - libomp.dylib for Mac OSX
    - libgomp.so for Linux and other UNIX-like OSes
    Mac OSX users: Run `brew install libomp` to install OpenMP runtime.

  * You are running 32-bit Python on a 64-bit OS

Error message(s): ["dlopen(/Users/yuchenzhou/Documents/duke/compsci526/final_proj/git_proj/.venv/lib/python3.13/site-packages/xgboost/lib/libxgboost.dylib, 0x0006): Library not loaded: @rpath/libomp.dylib\n  Referenced from: <E8D72161-CCD1-3423-9388-36D4CA0A7524> /Users/yuchenzhou/Documents/duke/compsci526/final_proj/git_proj/.venv/lib/python3.13/site-packages/xgboost/lib/libxgboost.dylib\n  Reason: tried: '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/homebrew/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libomp.dylib' (no such file), '/opt/homebrew/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/lib/libomp.dylib' (no such file)"]


## Section 2: Load and Explore Data

In [None]:
# Define data path
data_dir = '/Users/yuchenzhou/Documents/duke/compsci526/final_proj/mimic_data/processed_data'

print("=" * 80)
print("【Section 2】数据加载和探索")
print("=" * 80)

## Section 3: Data Preprocessing

In [None]:
print("\n" + "=" * 80)
print("【Section 3】数据预处理")
print("=" * 80)

# 1️⃣ Remove unnecessary columns
print("\n1️⃣ 移除不必要的列...")
cols_to_drop = [col for col in df.columns if col in ['subject_id', 'hadm_id', 'primary_icd_code']]
df_processed = df.drop(columns=cols_to_drop, errors='ignore')
print(f"   移除了 {len(cols_to_drop)} 列")
print(f"   新数据形状: {df_processed.shape}")

# 2️⃣ Separate features and target
print("\n2️⃣ 分离特征和目标变量...")
X = df_processed.drop(columns=['readmit_30d'])
y = df_processed['readmit_30d'].copy()

print(f"   特征数: {X.shape[1]}")
print(f"   目标变量分布:")
print(f"      - 0 (无再入院): {(y==0).sum():,}")
print(f"      - 1 (30天再入院): {(y==1).sum():,}")
print(f"      - 类别不平衡比: {(y==1).sum() / (y==0).sum():.4f}")

# 3️⃣ Check and handle missing values
print("\n3️⃣ 处理缺失值...")
missing_per_col = X.isnull().sum()
missing_cols = missing_per_col[missing_per_col > 0]

if len(missing_cols) > 0:
    for col, count in missing_cols.items():
        X[col].fillna(X[col].median(), inplace=True)
    print(f"   已处理 {len(missing_cols)} 列的缺失值")
else:
    print(f"   ✅ 无缺失值")

# 4️⃣ Remove duplicates
print("\n4️⃣ 检查重复值...")
duplicates = X.duplicated().sum()
print(f"   重复行数: {duplicates}")
if duplicates > 0:
    X = X.drop_duplicates()
    y = y[X.index]
    print(f"   已移除重复行，新形状: {X.shape}")

print("\n✅ 数据预处理完成")


## Section 4: Split Data into Train and Test Sets

In [None]:
print("\n" + "=" * 80)
print("【Section 4】数据集划分 (训练/测试)")
print("=" * 80)

# Split data: 80% train, 20% test
print("\n1️⃣ 使用 stratified split 划分数据 (80/20)...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"   ✅ 划分完成")
print(f"   训练集: {X_train.shape[0]:,} 样本")
print(f"      - 正类 (再入院): {(y_train==1).sum():,} ({(y_train==1).sum()/len(y_train)*100:.2f}%)")
print(f"      - 负类 (无再入院): {(y_train==0).sum():,} ({(y_train==0).sum()/len(y_train)*100:.2f}%)")

print(f"\n   测试集: {X_test.shape[0]:,} 样本")
print(f"      - 正类 (再入院): {(y_test==1).sum():,} ({(y_test==1).sum()/len(y_test)*100:.2f}%)")
print(f"      - 负类 (无再入院): {(y_test==0).sum():,} ({(y_test==0).sum()/len(y_test)*100:.2f}%)")

# 2️⃣ Feature scaling
print("\n2️⃣ 特征缩放 (StandardScaler)...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

print(f"   ✅ 缩放完成")
print(f"   训练集统计 (缩放后):")
print(f"      - 均值: {X_train_scaled.mean().mean():.6f} (≈ 0 ✓)")
print(f"      - 标准差: {X_train_scaled.std().mean():.6f} (≈ 1 ✓)")

print("\n✅ 数据集划分完成")


## Section 5-7: Build, Train, and Predict with Multiple Baseline Models

In [None]:
print("\n" + "=" * 80)
print("【Sections 5-7】构建、训练和预测 - 多个基线模型")
print("=" * 80)

models = {}
results = {}

print("\n1️⃣ 初始化模型...")

model_configs = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42, solver='lbfgs', class_weight='balanced'),
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=20, min_samples_split=10, random_state=42, n_jobs=-1, class_weight='balanced'),
    'XGBoost': xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42, scale_pos_weight=(y_train==0).sum()/(y_train==1).sum(), n_jobs=-1, eval_metric='logloss'),
    'SVM': SVC(kernel='rbf', C=1.0, random_state=42, probability=True, class_weight='balanced'),
    'Naive Bayes': GaussianNB()
}

for name, model in model_configs.items():
    models[name] = model
    print(f"   ✅ {name} 已初始化")

print("\n2️⃣ 训练模型...")
for name, model in models.items():
    print(f"   训练 {name}...", end=" ", flush=True)
    model.fit(X_train_scaled, y_train)
    print("✅")

print("\n3️⃣ 生成预测...")
for name, model in models.items():
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)
    y_train_pred_proba = model.predict_proba(X_train_scaled)[:, 1]
    y_test_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
    
    results[name] = {
        'y_train_pred': y_train_pred,
        'y_test_pred': y_test_pred,
        'y_train_pred_proba': y_train_pred_proba,
        'y_test_pred_proba': y_test_pred_proba,
        'model': model
    }

print("   ✅ 所有模型预测完成")


## Section 8: Evaluate Model Performance

In [None]:
print("\n" + "=" * 80)
print("【Section 8】模型性能评估")
print("=" * 80)

metrics_summary = {}

print("\n" + "=" * 100)
print(f"{'模型':<20} {'AUROC':<12} {'AUPRC':<12} {'F1 Score':<12} {'Accuracy':<12} {'Precision':<12} {'Recall':<12}")
print("=" * 100)

for name, result in results.items():
    y_test_pred_proba = result['y_test_pred_proba']
    y_test_pred = result['y_test_pred']
    
    auroc = roc_auc_score(y_test, y_test_pred_proba)
    precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_test_pred_proba)
    auprc = auc(recall_curve, precision_curve)
    f1 = f1_score(y_test, y_test_pred)
    accuracy = accuracy_score(y_test, y_test_pred)
    precision = precision_score(y_test, y_test_pred, zero_division=0)
    recall = recall_score(y_test, y_test_pred, zero_division=0)
    
    metrics_summary[name] = {
        'AUROC': auroc,
        'AUPRC': auprc,
        'F1': f1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'y_test_pred': y_test_pred,
        'y_test_pred_proba': y_test_pred_proba
    }
    
    print(f"{name:<20} {auroc:<12.4f} {auprc:<12.4f} {f1:<12.4f} {accuracy:<12.4f} {precision:<12.4f} {recall:<12.4f}")

print("=" * 100)

best_model_name = max(metrics_summary, key=lambda x: metrics_summary[x]['AUROC'])
print(f"\n🏆 最佳模型 (按 AUROC): {best_model_name}")
print(f"   AUROC: {metrics_summary[best_model_name]['AUROC']:.4f}")
print(f"   AUPRC: {metrics_summary[best_model_name]['AUPRC']:.4f}")

print("\n✅ 性能评估完成")


## Section 9: Visualize Results

In [None]:
print("\n" + "=" * 80)
print("【Section 9】可视化结果")
print("=" * 80)

# 1. ROC Curves
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

ax = axes[0]
for name, metrics in metrics_summary.items():
    fpr, tpr, _ = roc_curve(y_test, metrics['y_test_pred_proba'])
    ax.plot(fpr, tpr, label=f"{name} (AUROC={metrics['AUROC']:.4f})", linewidth=2)

ax.plot([0, 1], [0, 1], 'k--', label='Random Classifier', linewidth=1)
ax.set_xlabel('False Positive Rate', fontsize=12)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_title('ROC Curves - Model Comparison', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(alpha=0.3)

ax = axes[1]
metrics_names = list(metrics_summary.keys())
auroc_scores = [metrics_summary[name]['AUROC'] for name in metrics_names]
colors = plt.cm.viridis(np.linspace(0, 1, len(metrics_names)))

bars = ax.barh(metrics_names, auroc_scores, color=colors)
ax.set_xlabel('AUROC Score', fontsize=12)
ax.set_title('Model Comparison by AUROC', fontsize=14, fontweight='bold')
ax.set_xlim([0.5, 1.0])

for i, (bar, score) in enumerate(zip(bars, auroc_scores)):
    ax.text(score + 0.01, i, f'{score:.4f}', va='center', fontsize=11, fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'roc_and_auroc_comparison.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ ROC 曲线和 AUROC 对比已保存")

# 2. Confusion Matrices
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
axes = axes.flatten()

for idx, (name, metrics) in enumerate(sorted(metrics_summary.items())):
    cm = confusion_matrix(y_test, metrics['y_test_pred'])
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx], cbar=False,
                xticklabels=['无再入院', '30天再入院'],
                yticklabels=['无再入院', '30天再入院'])
    axes[idx].set_title(f'{name}\n(F1={metrics["F1"]:.4f})', fontsize=12, fontweight='bold')
    axes[idx].set_ylabel('True Label')
    axes[idx].set_xlabel('Predicted Label')

axes[-1].axis('off')

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'confusion_matrices.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ 混淆矩阵已保存")

# 3. Performance Metrics Heatmap
metrics_df = pd.DataFrame(metrics_summary).T
metrics_cols = ['AUROC', 'AUPRC', 'F1', 'Accuracy', 'Precision', 'Recall']
metrics_data = metrics_df[metrics_cols]

fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(metrics_data, annot=True, fmt='.4f', cmap='RdYlGn', ax=ax, 
            cbar_kws={'label': 'Score'}, vmin=0, vmax=1, linewidths=1, linecolor='gray')

ax.set_title('Model Performance Metrics Comparison', fontsize=14, fontweight='bold')
ax.set_xlabel('Metrics', fontsize=12)
ax.set_ylabel('Models', fontsize=12)
plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'metrics_heatmap.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ 性能指标热力图已保存")

# 4. Feature Importance
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

rf_model = results['Random Forest']['model']
feature_importance_rf = pd.DataFrame({
    'feature': X_train.columns,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False).head(20)

ax = axes[0]
ax.barh(range(len(feature_importance_rf)), feature_importance_rf['importance'], color='steelblue')
ax.set_yticks(range(len(feature_importance_rf)))
ax.set_yticklabels(feature_importance_rf['feature'])
ax.set_xlabel('Importance', fontsize=12)
ax.set_title('Random Forest - Top 20 Feature Importance', fontsize=12, fontweight='bold')
ax.invert_yaxis()

xgb_model = results['XGBoost']['model']
feature_importance_xgb = pd.DataFrame({
    'feature': X_train.columns,
    'importance': xgb_model.feature_importances_
}).sort_values('importance', ascending=False).head(20)

ax = axes[1]
ax.barh(range(len(feature_importance_xgb)), feature_importance_xgb['importance'], color='coral')
ax.set_yticks(range(len(feature_importance_xgb)))
ax.set_yticklabels(feature_importance_xgb['feature'])
ax.set_xlabel('Importance', fontsize=12)
ax.set_title('XGBoost - Top 20 Feature Importance', fontsize=12, fontweight='bold')
ax.invert_yaxis()

plt.tight_layout()
plt.savefig(os.path.join(data_dir, 'feature_importance.png'), dpi=300, bbox_inches='tight')
plt.show()
print("✅ 特征重要性已保存")

print("\n✅ 所有可视化已完成")


## Section 10: Cross-Validation and Final Summary

In [None]:
print("\n" + "=" * 80)
print("【Section 10】交叉验证和总结")
print("=" * 80)

print("\n1️⃣ 进行 5-Fold 交叉验证...")
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_results = {}

for name, model in models.items():
    print(f"   {name}...", end=" ", flush=True)
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=skf, scoring='roc_auc', n_jobs=-1)
    cv_results[name] = {'mean': cv_scores.mean(), 'std': cv_scores.std(), 'scores': cv_scores}
    print(f"AUROC: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")

print("\n2️⃣ 测试集性能 vs 交叉验证性能:")
print("=" * 110)
print(f"{'模型':<20} {'测试集AUROC':<18} {'交叉验证AUROC':<22} {'差异':<12} {'标准差':<12}")
print("=" * 110)

for name in sorted(metrics_summary.keys()):
    test_auroc = metrics_summary[name]['AUROC']
    cv_mean = cv_results[name]['mean']
    cv_std = cv_results[name]['std']
    diff = test_auroc - cv_mean
    print(f"{name:<20} {test_auroc:<18.4f} {cv_mean:<22.4f} {diff:<12.4f} {cv_std:<12.4f}")

print("=" * 110)

print("\n3️⃣ 保存综合报告...")

report = f"""
{'='*100}
MIMIC-IV 30日再入院预测 - 基线模型报告
{'='*100}

生成时间: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}

【数据集概览】
{'-'*100}
- 总样本数: {len(X):,}
- 特征维度: {X.shape[1]}
- 正类比例: {(y==1).sum()/len(y)*100:.2f}%
- 训练集大小: {len(X_train):,}
- 测试集大小: {len(X_test):,}

【测试集性能指标】
{'-'*100}
{'模型':<20} {'AUROC':<12} {'AUPRC':<12} {'F1 Score':<12} {'Accuracy':<12} {'Precision':<12} {'Recall':<12}
{'-'*100}
"""

for name in sorted(metrics_summary.keys()):
    m = metrics_summary[name]
    report += f"{name:<20} {m['AUROC']:<12.4f} {m['AUPRC']:<12.4f} {m['F1']:<12.4f} {m['Accuracy']:<12.4f} {m['Precision']:<12.4f} {m['Recall']:<12.4f}\n"

report += f"""
【交叉验证结果 (5-Fold)】
{'-'*100}
{'模型':<20} {'平均AUROC':<18} {'标准差':<12}
{'-'*100}
"""

for name in sorted(cv_results.keys()):
    cv = cv_results[name]
    report += f"{name:<20} {cv['mean']:<18.4f} {cv['std']:<12.4f}\n"

best_model = max(metrics_summary, key=lambda x: metrics_summary[x]['AUROC'])
report += f"""
【最佳模型】
{'-'*100}
模型: {best_model}
测试集 AUROC: {metrics_summary[best_model]['AUROC']:.4f}
测试集 AUPRC: {metrics_summary[best_model]['AUPRC']:.4f}
测试集 F1 Score: {metrics_summary[best_model]['F1']:.4f}
交叉验证 AUROC: {cv_results[best_model]['mean']:.4f} (+/- {cv_results[best_model]['std']:.4f})

【建议和后续步骤】
{'-'*100}
1. 超参数调优 - GridSearchCV 或 RandomizedSearchCV
2. 特征工程 - 特征交互、多项式特征
3. 数据平衡 - SMOTE 过采样
4. 集成学习 - Stacking/Voting
5. 模型解释 - SHAP values, LIME

{'='*100}
"""

report_path = os.path.join(data_dir, 'baseline_models_report.txt')
with open(report_path, 'w', encoding='utf-8') as f:
    f.write(report)

print(f"   ✅ 报告已保存: {report_path}")

metrics_json = {}
for name, metrics in metrics_summary.items():
    metrics_json[name] = {
        'AUROC': float(metrics['AUROC']),
        'AUPRC': float(metrics['AUPRC']),
        'F1': float(metrics['F1']),
        'Accuracy': float(metrics['Accuracy']),
        'Precision': float(metrics['Precision']),
        'Recall': float(metrics['Recall']),
        'CV_Mean': float(cv_results[name]['mean']),
        'CV_Std': float(cv_results[name]['std'])
    }

json_path = os.path.join(data_dir, 'baseline_models_metrics.json')
with open(json_path, 'w', encoding='utf-8') as f:
    json.dump(metrics_json, f, indent=2, ensure_ascii=False)

print(f"   ✅ 指标已保存: {json_path}")
print(f"\n✅ 所有结果已保存到: {data_dir}")
