# Model Metrics
---

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score
)
from scipy.stats import norm

# 从CSV文件中导入数据
all_train_results = pd.read_csv("all_train_results.csv")
all_val_results = pd.read_csv("all_val_results.csv")

def find_optimal_threshold(y_true, y_proba):
    """
    使用 Youden's Index 找到最佳阈值
    """
    fpr, tpr, thresholds = roc_curve(y_true, y_proba)
    youden_index = tpr - fpr
    optimal_idx = np.argmax(youden_index)
    return thresholds[optimal_idx]

def format_metric(metric, ci):
    """
    将指标及其置信区间格式化为 0.897(0.800-0.930) 的形式
    """
    return f"{metric:.3f}({ci[0]:.3f}-{ci[1]:.3f})"

def calculate_auc_with_ci(y_true, y_proba, n_bootstraps=1000):
    """
    计算AUC及其95%置信区间
    """
    auc = roc_auc_score(y_true, y_proba)
    bootstrapped_scores = []
    
    for _ in range(n_bootstraps):
        indices = np.random.randint(0, len(y_true), len(y_true))
        if len(np.unique(y_true[indices])) < 2:
            continue
        score = roc_auc_score(y_true[indices], y_proba[indices])
        bootstrapped_scores.append(score)
    
    sorted_scores = np.array(bootstrapped_scores)
    sorted_scores.sort()
    ci_lower = sorted_scores[int(0.025 * len(sorted_scores))]
    ci_upper = sorted_scores[int(0.975 * len(sorted_scores))]
    
    return auc, (ci_lower, ci_upper)

def calculate_metrics(y_true, y_proba, threshold):
    """
    计算模型的性能指标及其置信区间
    """
    # 根据阈值将概率转换为预测标签
    y_pred = (y_proba >= threshold).astype(int)
    
    # 计算混淆矩阵
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    # 计算指标
    sensitivity = recall_score(y_true, y_pred)  # 灵敏度（召回率）
    specificity = tn / (tn + fp)  # 特异度
    ppv = precision_score(y_true, y_pred)  # 阳性预测值（Precision）
    npv = tn / (tn + fn)  # 阴性预测值
    accuracy = accuracy_score(y_true, y_pred)  # 准确度
    f1 = f1_score(y_true, y_pred)  # F1分数
    
    # 计算AUC及其置信区间
    auc, auc_ci = calculate_auc_with_ci(y_true, y_proba)
    
    # 计算置信区间（95%）
    def calculate_ci(metric, n):
        se = np.sqrt(metric * (1 - metric) / n)  # 标准误差
        z = norm.ppf(0.975)  # 95% 置信区间的 Z 值
        return (metric - z * se, metric + z * se)
    
    n = len(y_true)
    sensitivity_ci = calculate_ci(sensitivity, n)
    specificity_ci = calculate_ci(specificity, n)
    ppv_ci = calculate_ci(ppv, n)
    npv_ci = calculate_ci(npv, n)
    accuracy_ci = calculate_ci(accuracy, n)
    f1_ci = calculate_ci(f1, n)
    
    # 返回格式化后的结果
    return {
        'sensitivity': format_metric(sensitivity, sensitivity_ci),
        'specificity': format_metric(specificity, specificity_ci),
        'ppv': format_metric(ppv, ppv_ci),
        'npv': format_metric(npv, npv_ci),
        'accuracy': format_metric(accuracy, accuracy_ci),
        'f1': format_metric(f1, f1_ci),
        'precision': format_metric(ppv, ppv_ci),  # Precision 和 PPV 相同
        'recall': format_metric(sensitivity, sensitivity_ci),  # Recall 和 Sensitivity 相同
        'auc': format_metric(auc, auc_ci),  # AUC及其置信区间
        'optimal_threshold': threshold  # 最佳阈值
    }

# 初始化两个字典分别存储训练集和验证集的结果
train_metrics_dict = {}
val_metrics_dict = {}

# 获取所有模型的名称（从列名中提取）
model_names = [col.replace('_proba', '') for col in all_train_results.columns if col.endswith('_proba')]

# 计算每个模型的指标
for model_name in model_names:
    # 训练集结果
    y_train_true = all_train_results['y_train']
    y_train_proba = all_train_results[f'{model_name}_proba']
    optimal_threshold = find_optimal_threshold(y_train_true, y_train_proba)
    
    # 训练集指标
    train_metrics = calculate_metrics(y_train_true, y_train_proba, optimal_threshold)
    train_metrics_dict[model_name] = train_metrics
    
    # 验证集结果
    y_val_true = all_val_results['y_val']
    y_val_proba = all_val_results[f'{model_name}_proba']
    
    # 使用训练集的最佳阈值计算验证集指标
    val_metrics = calculate_metrics(y_val_true, y_val_proba, optimal_threshold)
    val_metrics_dict[model_name] = val_metrics
    
    # 打印结果
    print(f"Model: {model_name}")
    print("Train Metrics:")
    for metric, value in train_metrics.items():
        print(f"{metric}: {value}")
    print("Validation Metrics:")
    for metric, value in val_metrics.items():
        print(f"{metric}: {value}")
    print("\n")

# 创建训练集和验证集的数据框
train_results_list = []
val_results_list = []

for model_name in model_names:
    # 为训练集添加行
    train_row = {'model': model_name}
    train_row.update(train_metrics_dict[model_name])
    train_results_list.append(train_row)
    
    # 为验证集添加行
    val_row = {'model': model_name}
    val_row.update(val_metrics_dict[model_name])
    val_results_list.append(val_row)

# 将列表转换为DataFrame
train_results_df = pd.DataFrame(train_results_list)
val_results_df = pd.DataFrame(val_results_list)

# 创建带有时间戳的文件名以避免覆盖
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 导出到CSV
train_file = f"train_metrics_{timestamp}.csv"
val_file = f"val_metrics_{timestamp}.csv"

train_results_df.to_csv(train_file, index=False)
val_results_df.to_csv(val_file, index=False)

print(f"训练集指标已保存至: '{train_file}'")
print(f"验证集指标已保存至: '{val_file}'")

# 如果仍需要合并结果，也可以导出
combined_results_list = []
for model_name in model_names:
    # 训练集行
    train_row = {'model': model_name, 'dataset': 'train'}
    train_row.update(train_metrics_dict[model_name])
    combined_results_list.append(train_row)
    
    # 验证集行
    val_row = {'model': model_name, 'dataset': 'val'}
    val_row.update(val_metrics_dict[model_name])
    combined_results_list.append(val_row)

combined_results_df = pd.DataFrame(combined_results_list)
combined_file = f"combined_metrics_{timestamp}.csv"
combined_results_df.to_csv(combined_file, index=False)
print(f"合并指标已保存至: '{combined_file}'")

Model: tabpfn
Train Metrics:
sensitivity: 0.846(0.798-0.894)
specificity: 0.802(0.749-0.855)
ppv: 0.755(0.698-0.812)
npv: 0.878(0.835-0.922)
accuracy: 0.820(0.769-0.871)
f1: 0.798(0.745-0.851)
precision: 0.755(0.698-0.812)
recall: 0.846(0.798-0.894)
auc: 0.902(0.858-0.937)
optimal_threshold: 0.38669014
Validation Metrics:
sensitivity: 0.800(0.730-0.870)
specificity: 0.785(0.713-0.857)
ppv: 0.774(0.701-0.847)
npv: 0.810(0.741-0.878)
accuracy: 0.792(0.721-0.863)
f1: 0.787(0.715-0.859)
precision: 0.774(0.701-0.847)
recall: 0.800(0.730-0.870)
auc: 0.870(0.794-0.930)
optimal_threshold: 0.38669014


Model: rf
Train Metrics:
sensitivity: 0.813(0.761-0.865)
specificity: 0.825(0.775-0.876)
ppv: 0.771(0.715-0.827)
npv: 0.860(0.813-0.906)
accuracy: 0.820(0.769-0.871)
f1: 0.791(0.737-0.845)
precision: 0.771(0.715-0.827)
recall: 0.813(0.761-0.865)
auc: 0.895(0.851-0.932)
optimal_threshold: 0.4577880657803839
Validation Metrics:
sensitivity: 0.717(0.638-0.796)
specificity: 0.800(0.730-0.870)
ppv: 0.