In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams

# Set the global default font to Times New Roman
rcParams['font.family'] = 'Times New Roman'

# 1. Read the dataset
data = pd.read_csv("D:/Apple/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/t1+t2+t1Gd+flair_all/s_combined_modalities_965.csv")
data = data.drop(['index', 'gender', 'age_at_index', 'OS', "OS.time"], axis=1)

# Separate features and labels
X = data.drop('label', axis=1)
y = data['label']  # Extract the feature matrix by dropping the 'label' column.

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)  # Use StratifiedKFold for 5-fold cross-validation, ensuring balanced class distribution in each fold.

# Store results for each fold
auc_scores = []  # Store AUC scores
accuracies = []  # Store accuracy scores
sensitivities = []  # Store sensitivity scores
specificities = []  # Store specificity scores
precisions = []  # Store precision scores
f1_scores = []  # Store F1-Score values
model_net_benefits = []  # Store net benefits for each fold
total_conf_matrix = np.zeros((2, 2))  # Initialize a cumulative confusion matrix

# Perform cross-validation
for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    print(f"Fold {fold + 1}")
    
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Initialize and train the Random Forest model
    model = RandomForestClassifier(n_estimators=300, random_state=42)  # Create a Random Forest classifier with 300 trees and a fixed random state
    model.fit(X_train, y_train)
    
    # Predict probabilities and labels
    y_pred_proba = model.predict_proba(X_test)[:, 1]  # Predict probabilities for the positive class.
    y_pred = model.predict(X_test)  # Predict class labels.
    
    # Compute evaluation metrics
    auc_score = roc_auc_score(y_test, y_pred_proba)  # Compute the Area Under the ROC Curve (AUC).
    accuracy = accuracy_score(y_test, y_pred)  # Compute the accuracy score.
    conf_matrix = confusion_matrix(y_test, y_pred)  # Compute the confusion matrix.
    tn, fp, fn, tp = conf_matrix.ravel()  # Extract true negatives (TN), false positives (FP), false negatives (FN), and true positives (TP).
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = tp / (tp + fp)
    f1 = f1_score(y_test, y_pred)  # Compute the F1-Score.

    # Accumulate the confusion matrix
    total_conf_matrix += conf_matrix
    
    # Save the results
    auc_scores.append(auc_score)
    accuracies.append(accuracy)
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    precisions.append(precision)
    f1_scores.append(f1)  # Save the F1-Score.
    
    # Plot the ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % auc_score)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=36)
    plt.ylabel('True Positive Rate', fontsize=36)
    plt.title(f'ROC Curve (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.legend(loc="lower right", prop={'size': 30})
    plt.show()
    
    # Plot the confusion matrix
    plt.figure(figsize=(12, 8))
    sns.set(font_scale=4)
    heatmap = sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 60})
    cbar = heatmap.collections[0].colorbar
    cbar.ax.tick_params(labelsize=36)
    plt.xlabel('Predicted labels', fontsize=36)
    plt.ylabel('True labels', fontsize=36)
    plt.title(f'Confusion Matrix (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.show()

    # Decision Curve Analysis (DCA)
    thresholds = np.linspace(0.01, 0.99, 100)  # Generate 100 evenly spaced thresholds between 0.01 and 0.99.
    
    def calculate_net_benefit(thresholds, y_true, y_proba):
        net_benefits = []
        for threshold in thresholds:
            w = threshold / (1 - threshold)  # Weight for false positives
            predictions = y_proba >= threshold  # Predictions based on the current threshold
            tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
            fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
            net_benefit = tp - (fp * w)  # Calculate net benefit
            net_benefits.append(net_benefit / len(y_true))
        return net_benefits
    
    fold_net_benefits = calculate_net_benefit(thresholds, y_test, y_pred_proba)  # Calculate net benefits for the current fold.
    model_net_benefits.append(fold_net_benefits)  # Store the net benefits for the current fold.

# Plot the average DCA curve
avg_net_benefits = np.mean(model_net_benefits, axis=0)  # Compute the average net benefits across all folds.
plt.figure(figsize=(12, 8))
plt.plot(thresholds, avg_net_benefits, label='Random Forest (Average)', color='red')
plt.xlabel('Probability Threshold', fontsize=16)
plt.ylabel('Net Benefit', fontsize=16)
plt.title('Decision Curve Analysis (Average)', fontsize=18)
plt.legend(loc='lower left', fontsize=14)
plt.grid(True)
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()

# Compute and plot the average confusion matrix
avg_conf_matrix = total_conf_matrix / kf.get_n_splits()  # Compute the average confusion matrix by dividing the cumulative matrix by the number of folds.
plt.figure(figsize=(12, 8))
sns.set(font_scale=4)
heatmap = sns.heatmap(avg_conf_matrix, annot=True, fmt='.2f', cmap='Blues', annot_kws={"size": 60})
cbar = heatmap.collections[0].colorbar
cbar.ax.tick_params(labelsize=36)
plt.xlabel('Predicted labels', fontsize=36)
plt.ylabel('True labels', fontsize=36)
plt.title('Average Confusion Matrix', fontsize=36, y=1.02)
plt.show()

# Output average results
print("\nCross-Validation Results:")
print(f"Mean AUC: {np.mean(auc_scores):.8f} ± {np.std(auc_scores):.8f}")
print(f"Mean Accuracy: {np.mean(accuracies):.8f} ± {np.std(accuracies):.8f}")
print(f"Mean Sensitivity: {np.mean(sensitivities):.8f} ± {np.std(sensitivities):.8f}")
print(f"Mean Specificity: {np.mean(specificities):.8f} ± {np.std(specificities):.8f}")
print(f"Mean Precision: {np.mean(precisions):.8f} ± {np.std(precisions):.8f}")
print(f"Mean F1-Score: {np.mean(f1_scores):.8f} ± {np.std(f1_scores):.8f}")  # Output the mean and standard deviation of F1-Score.

In [None]:
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, roc_curve, precision_score, f1_score
from matplotlib import rcParams

# Set the global default font to Times New Roman
rcParams['font.family'] = 'Times New Roman'

# 1. Read the dataset
data = pd.read_csv("D:/Apple/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/t1+t2+t1Gd+flair_all/s_combined_modalities_965.csv")
data = data.drop(['index', 'gender', 'age_at_index', 'OS', "OS.time"], axis=1)

# Separate features and labels
X = data.drop('label', axis=1)
y = data['label']

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store results for each fold, including AUC, accuracy, sensitivity, specificity, precision, F1-Score, and confusion matrix. This part is the same as the previous RF model.
auc_scores = []
accuracies = []
sensitivities = []
specificities = []
precisions = []
f1_scores = []
model_net_benefits = []  # Store net benefits for each fold
total_conf_matrix = np.zeros((2, 2))  # Initialize a cumulative confusion matrix

# Perform cross-validation
for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    print(f"Fold {fold + 1}")
    
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Initialize and train the XGBoost model
    clf_XGB = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    # Initialize an XGBoost classifier with the following parameters:
    # - `use_label_encoder=False`: Disables the use of the label encoder, which is deprecated in newer versions of XGBoost.
    # - `eval_metric='logloss'`: Specifies the evaluation metric as log loss, which is commonly used for binary classification.
    # - `random_state=42`: Sets a fixed random seed to ensure reproducibility of results.
    clf_XGB.fit(X_train, y_train)
    # Train the XGBoost classifier on the training data:
    # - `X_train`: The feature matrix for the training set.
    # - `y_train`: The target labels for the training set.
    # The model learns patterns in the training data to predict the target labels.

    # Predict probabilities and labels
    y_pred_proba = clf_XGB.predict_proba(X_test)[:, 1]
    # Predict the probabilities for the positive class (class 1) on the test set:
    # - `X_test`: The feature matrix for the test set.
    # - `predict_proba`: Returns the predicted probabilities for both classes (class 0 and class 1).
    # - `[:, 1]`: Extracts the probabilities for the positive class (class 1).
    y_pred = clf_XGB.predict(X_test)
    # Predict the class labels for the test set:
    # - `X_test`: The feature matrix for the test set.
    # - `predict`: Returns the predicted class labels (0 or 1) based on the learned decision boundary.
    
    # Compute evaluation metrics
    auc_score = roc_auc_score(y_test, y_pred_proba)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Accumulate the confusion matrix
    total_conf_matrix += conf_matrix
    
    # Save the results
    auc_scores.append(auc_score)
    accuracies.append(accuracy)
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    precisions.append(precision)
    f1_scores.append(f1)
    
    # Plot the ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % auc_score)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=36)
    plt.ylabel('True Positive Rate', fontsize=36)
    plt.title(f'ROC Curve (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.legend(loc="lower right", prop={'size': 30})
    plt.show()
    
    # Plot the confusion matrix
    plt.figure(figsize=(12, 8))
    sns.set(font_scale=4)
    heatmap = sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 60})
    cbar = heatmap.collections[0].colorbar
    cbar.ax.tick_params(labelsize=36)
    plt.xlabel('Predicted labels', fontsize=36)
    plt.ylabel('True labels', fontsize=36)
    plt.title(f'Confusion Matrix (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.show()
    
    # Decision Curve Analysis (DCA). This is the same as the previous RF model.
    thresholds = np.linspace(0.01, 0.99, 100)
    
    def calculate_net_benefit(thresholds, y_true, y_proba):
        net_benefits = []
        for threshold in thresholds:
            w = threshold / (1 - threshold)  # Weight for false positives
            predictions = y_proba >= threshold  # Predictions based on the current threshold
            tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
            fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
            net_benefit = tp - (fp * w)  # Calculate net benefit
            net_benefits.append(net_benefit / len(y_true))
        return net_benefits
    
    fold_net_benefits = calculate_net_benefit(thresholds, y_test, y_pred_proba)
    model_net_benefits.append(fold_net_benefits)

# Plot the average DCA curve
avg_net_benefits = np.mean(model_net_benefits, axis=0)
plt.figure(figsize=(12, 8))
plt.plot(thresholds, avg_net_benefits, label='Random Forest (Average)', color='red')
plt.xlabel('Probability Threshold', fontsize=16)
plt.ylabel('Net Benefit', fontsize=16)
plt.title('Decision Curve Analysis (Average)', fontsize=18)
plt.legend(loc='lower left', fontsize=14)
plt.grid(True)
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()

# Compute and plot the average confusion matrix
avg_conf_matrix = total_conf_matrix / kf.get_n_splits()
plt.figure(figsize=(12, 8))
sns.set(font_scale=4)
heatmap = sns.heatmap(avg_conf_matrix, annot=True, fmt='.2f', cmap='Blues', annot_kws={"size": 60})
cbar = heatmap.collections[0].colorbar
cbar.ax.tick_params(labelsize=36)
plt.xlabel('Predicted labels', fontsize=36)
plt.ylabel('True labels', fontsize=36)
plt.title('Average Confusion Matrix', fontsize=36, y=1.02)
plt.show()

# Output average results
print("\nCross-Validation Results:")
print(f"Mean AUC: {np.mean(auc_scores):.8f} ± {np.std(auc_scores):.8f}")
print(f"Mean Accuracy: {np.mean(accuracies):.8f} ± {np.std(accuracies):.8f}")
print(f"Mean Sensitivity: {np.mean(sensitivities):.8f} ± {np.std(sensitivities):.8f}")
print(f"Mean Specificity: {np.mean(specificities):.8f} ± {np.std(specificities):.8f}")
print(f"Mean Precision: {np.mean(precisions):.8f} ± {np.std(precisions):.8f}")
print(f"Mean F1-Score: {np.mean(f1_scores):.8f} ± {np.std(f1_scores):.8f}")  # Output the mean and standard deviation of F1-Score.

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix, precision_score, f1_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams

# Set the global default font to Times New Roman
rcParams['font.family'] = 'Times New Roman'

# 1. Read the dataset
data = pd.read_csv("D:/Apple/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/t1+t2+t1Gd+flair_all/s_combined_modalities_965.csv")
data = data.drop(['index', 'gender', 'age_at_index', 'OS', "OS.time"], axis=1)

# Separate features and labels
X = data.drop('label', axis=1)
y = data['label']

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store results for each fold
auc_scores = []
accuracies = []
sensitivities = []
specificities = []
precisions = []
f1_scores = []
model_net_benefits = []  # Store net benefits for each fold
total_conf_matrix = np.zeros((2, 2))  # Initialize a cumulative confusion matrix

# Hyperparameter grid
param_grid = {
    'C': [1],  # Regularization strength
    'solver': ['lbfgs'],  # Optimization algorithm
    'penalty': ['l2'],  # Regularization type
    'max_iter': [100]  # Maximum number of iterations
}

# Perform cross-validation
for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    print(f"Fold {fold + 1}")
    
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Data normalization
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Perform hyperparameter tuning using GridSearchCV
    model = LogisticRegression()  # Initialize a Logistic Regression model with default parameters.
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='roc_auc', n_jobs=-1)
    # Perform hyperparameter tuning using GridSearchCV:
    # - `model`: The Logistic Regression model to be tuned.
    # - `param_grid`: The grid of hyperparameters to search (e.g., 'C', 'solver', 'penalty', 'max_iter').
    # - `cv=5`: Use 5-fold cross-validation to evaluate each combination of hyperparameters.
    # - `scoring='roc_auc'`: Use the Area Under the ROC Curve (AUC) as the evaluation metric.
    # - `n_jobs=-1`: Use all available CPU cores for parallel processing.
    grid_search.fit(X_train_scaled, y_train)
    # Fit the Logistic Regression model to the scaled training data.
    # GridSearchCV will search through the hyperparameter grid and find the best combination based on the AUC score.
    best_model = grid_search.best_estimator_
    
    # Predict probabilities and labels
    y_pred_proba = best_model.predict_proba(X_test_scaled)[:, 1]
    y_pred = best_model.predict(X_test_scaled)
    
    # Compute evaluation metrics
    auc_score = roc_auc_score(y_test, y_pred_proba)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = tp / (tp + fp)
    f1 = f1_score(y_test, y_pred)
    
    # Accumulate the confusion matrix
    total_conf_matrix += conf_matrix
    
    # Save the results
    auc_scores.append(auc_score)
    accuracies.append(accuracy)
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    precisions.append(precision)
    f1_scores.append(f1)
    
    # Plot the ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % auc_score)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=36)
    plt.ylabel('True Positive Rate', fontsize=36)
    plt.title(f'ROC Curve (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.legend(loc="lower right", prop={'size': 30})
    plt.show()
    
    # Plot the confusion matrix
    plt.figure(figsize=(12, 8))
    sns.set(font_scale=4)
    heatmap = sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 60})
    cbar = heatmap.collections[0].colorbar
    cbar.ax.tick_params(labelsize=36)
    plt.xlabel('Predicted labels', fontsize=36)
    plt.ylabel('True labels', fontsize=36)
    plt.title(f'Confusion Matrix (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.show()

    # Decision Curve Analysis (DCA), same as the previous RF model
    thresholds = np.linspace(0.01, 0.99, 100)
    def calculate_net_benefit(thresholds, y_true, y_proba):
        net_benefits = []
        for threshold in thresholds:
            w = threshold / (1 - threshold)  # Weight for false positives
            predictions = y_proba >= threshold  # Predictions based on the current threshold
            tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
            fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
            net_benefit = tp - (fp * w)  # Calculate net benefit
            net_benefits.append(net_benefit / len(y_true))
        return net_benefits
    
    fold_net_benefits = calculate_net_benefit(thresholds, y_test, y_pred_proba)
    model_net_benefits.append(fold_net_benefits)

# Plot the average DCA curve
avg_net_benefits = np.mean(model_net_benefits, axis=0)
plt.figure(figsize=(12, 8))
plt.plot(thresholds, avg_net_benefits, label='Logistic Regression (Average)', color='red')
plt.xlabel('Probability Threshold', fontsize=16)
plt.ylabel('Net Benefit', fontsize=16)
plt.title('Decision Curve Analysis (Average)', fontsize=18)
plt.legend(loc='lower left', fontsize=14)
plt.grid(True)
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()

# Compute and plot the average confusion matrix
avg_conf_matrix = total_conf_matrix / kf.get_n_splits()
plt.figure(figsize=(12, 8))
sns.set(font_scale=4)
heatmap = sns.heatmap(avg_conf_matrix, annot=True, fmt='.2f', cmap='Blues', annot_kws={"size": 60})
cbar = heatmap.collections[0].colorbar
cbar.ax.tick_params(labelsize=36)
plt.xlabel('Predicted labels', fontsize=36)
plt.ylabel('True labels', fontsize=36)
plt.title('Average Confusion Matrix', fontsize=36, y=1.02)
plt.show()

# Output average results
print("\nCross-Validation Results:")
print(f"Mean AUC: {np.mean(auc_scores):.8f} ± {np.std(auc_scores):.8f}")
print(f"Mean Accuracy: {np.mean(accuracies):.8f} ± {np.std(accuracies):.8f}")
print(f"Mean Sensitivity: {np.mean(sensitivities):.8f} ± {np.std(sensitivities):.8f}")
print(f"Mean Specificity: {np.mean(specificities):.8f} ± {np.std(specificities):.8f}")
print(f"Mean Precision: {np.mean(precisions):.8f} ± {np.std(precisions):.8f}")
print(f"Mean F1-Score: {np.mean(f1_scores):.8f} ± {np.std(f1_scores):.8f}")  # Output the mean and standard deviation of F1-Score.

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams

# Set the global default font to Times New Roman
rcParams['font.family'] = 'Times New Roman'

# 1. Read the dataset
data = pd.read_csv("D:/Apple/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/t1+t2+t1Gd+flair_all/s_combined_modalities_965.csv")
data = data.drop(['index', 'gender', 'age_at_index', 'OS', "OS.time"], axis=1)

# Separate features and labels
X = data.drop('label', axis=1)
y = data['label']

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Store results for each fold
auc_scores = []
accuracies = []
sensitivities = []
specificities = []
precisions = []
f1_scores = []
model_net_benefits = []  # Store net benefits for each fold
total_conf_matrix = np.zeros((2, 2))  # Initialize a cumulative confusion matrix

# Hyperparameter grid
param_grid = {'C': [1], 'kernel': ['linear'], 'class_weight': [None]}

# Perform cross-validation
for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    print(f"Fold {fold + 1}")
    
    # Split the data into training and testing sets
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    
    # Data normalization
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Perform hyperparameter tuning using GridSearchCV
    model = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='roc_auc')
    model.fit(X_train_scaled, y_train)
    best_model = model.best_estimator_
    print("Best parameters found: ", model.best_params_)
    
    # Predict probabilities and labels
    y_pred_proba = best_model.predict_proba(X_test_scaled)[:, 1]
    y_pred = best_model.predict(X_test_scaled)
    
    # Compute evaluation metrics
    auc_score = roc_auc_score(y_test, y_pred_proba)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = conf_matrix.ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = tp / (tp + fp)
    f1 = f1_score(y_test, y_pred)
    
    # Accumulate the confusion matrix
    total_conf_matrix += conf_matrix
    
    # Save the results
    auc_scores.append(auc_score)
    accuracies.append(accuracy)
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    precisions.append(precision)
    f1_scores.append(f1)
    
    # Plot the ROC curve
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % auc_score)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=36)
    plt.ylabel('True Positive Rate', fontsize=36)
    plt.title(f'ROC Curve (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.legend(loc="lower right", prop={'size': 30})
    plt.show()
    
    # Plot the confusion matrix
    plt.figure(figsize=(12, 8))
    sns.set(font_scale=4)
    heatmap = sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 60})
    cbar = heatmap.collections[0].colorbar
    cbar.ax.tick_params(labelsize=36)
    plt.xlabel('Predicted labels', fontsize=36)
    plt.ylabel('True labels', fontsize=36)
    plt.title(f'Confusion Matrix (Fold {fold + 1})', fontsize=36, y=1.02)
    plt.show()

    # Decision Curve Analysis (DCA)
    thresholds = np.linspace(0.01, 0.99, 100)
    
    def calculate_net_benefit(thresholds, y_true, y_proba):
        net_benefits = []
        for threshold in thresholds:
            w = threshold / (1 - threshold)  # Weight for false positives
            predictions = y_proba >= threshold  # Predictions based on the current threshold
            tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
            fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
            net_benefit = tp - (fp * w)  # Calculate net benefit
            net_benefits.append(net_benefit / len(y_true))
        return net_benefits
    
    fold_net_benefits = calculate_net_benefit(thresholds, y_test, y_pred_proba)
    model_net_benefits.append(fold_net_benefits)

# Plot the average DCA curve
avg_net_benefits = np.mean(model_net_benefits, axis=0)
plt.figure(figsize=(12, 8))
plt.plot(thresholds, avg_net_benefits, label='SVM (Average)', color='red')
plt.xlabel('Probability Threshold', fontsize=16)
plt.ylabel('Net Benefit', fontsize=16)
plt.title('Decision Curve Analysis (Average)', fontsize=18)
plt.legend(loc='lower left', fontsize=14)
plt.grid(True)
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()

# Compute and plot the average confusion matrix
avg_conf_matrix = total_conf_matrix / kf.get_n_splits()
plt.figure(figsize=(12, 8))
sns.set(font_scale=4)
heatmap = sns.heatmap(avg_conf_matrix, annot=True, fmt='.2f', cmap='Blues', annot_kws={"size": 60})
cbar = heatmap.collections[0].colorbar
cbar.ax.tick_params(labelsize=36)
plt.xlabel('Predicted labels', fontsize=36)
plt.ylabel('True labels', fontsize=36)
plt.title('Average Confusion Matrix', fontsize=36, y=1.02)
plt.show()

# Output average results
print("\nCross-Validation Results:")
print(f"Mean AUC: {np.mean(auc_scores):.8f} ± {np.std(auc_scores):.8f}")
print(f"Mean Accuracy: {np.mean(accuracies):.8f} ± {np.std(accuracies):.8f}")
print(f"Mean Sensitivity: {np.mean(sensitivities):.8f} ± {np.std(sensitivities):.8f}")
print(f"Mean Specificity: {np.mean(specificities):.8f} ± {np.std(specificities):.8f}")
print(f"Mean Precision: {np.mean(precisions):.8f} ± {np.std(precisions):.8f}")
print(f"Mean F1-Score: {np.mean(f1_scores):.8f} ± {np.std(f1_scores):.8f}")  # Output the mean and standard deviation of F1-Score.