In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (
    accuracy_score, f1_score, recall_score, precision_score,
    matthews_corrcoef, roc_curve, auc
)
from sklearn.calibration import CalibratedClassifierCV
from numpy import interp

# Load data
df = pd.read_csv("C:/Users") # file path
y = df["TMT"]
X = df.drop(columns=["TMT"])

# 5-fold CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=None)  

# Store results for each fold
acc_scores, f1_scores, recall_scores, prec_scores, mcc_scores, iter_counts = [], [], [], [], [], []

# Store ROC curve data
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

plt.figure(figsize=(6, 5))
print("📊 Calibrated Logistic Regression (5-Fold CV) Performance:\n")

for fold, (train_idx, test_idx) in enumerate(cv.split(X, y), 1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    base_model = LogisticRegression(solver='liblinear', max_iter=100,random_state=42)
    base_model.fit(X_train, y_train)

    calibrated_model = CalibratedClassifierCV(base_model, cv='prefit', method='sigmoid')   #'prefit'
    calibrated_model.fit(X_train, y_train)

    y_pred = calibrated_model.predict(X_test)
    y_proba = calibrated_model.predict_proba(X_test)[:, 1]

    # scores
    acc_scores.append(accuracy_score(y_test, y_pred))
    f1_scores.append(f1_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred))
    prec_scores.append(precision_score(y_test, y_pred))
    mcc_scores.append(matthews_corrcoef(y_test, y_pred))
    iter_counts.append(base_model.n_iter_[0])

    # ROC
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = auc(fpr, tpr)
    aucs.append(roc_auc)
    interp_tpr = interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    plt.plot(fpr, tpr, lw=1, alpha=0.4, label=f'Fold {fold} (AUC = {roc_auc:.2f})')

    print(f"Fold {fold}: iter = {base_model.n_iter_[0]}, "
      f"Accuracy = {acc_scores[-1]:.4f}, "
      f"F1 = {f1_scores[-1]:.4f}, "
      f"Recall = {recall_scores[-1]:.4f}, "
      f"Precision = {prec_scores[-1]:.4f}, "
      f"MCC = {mcc_scores[-1]:.4f}")

# Plot mean ROC curve
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)

plt.plot(mean_fpr, mean_tpr, color='blue', lw=2, label=f'Mean ROC (AUC = {mean_auc:.2f})')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('(c) Baseline LR ROC')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.savefig("C:/Users",dpi=900, bbox_inches='tight') #Save file path
plt.close()

# Show mean and standard deviation results
print("\n📈 Average performance：")
print(f"Accuracy : {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
print(f"F1 Score : {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"Recall   : {np.mean(recall_scores):.4f} ± {np.std(recall_scores):.4f}")
print(f"Precision: {np.mean(prec_scores):.4f} ± {np.std(prec_scores):.4f}")
print(f"MCC      : {np.mean(mcc_scores):.4f} ± {np.std(mcc_scores):.4f}")
print(f"\n🌀 Average number of iterations: {np.mean(iter_counts):.1f} 次")
print("📈 ROC plot saved as Calibrated_LR_5Fold_ROC.png")