In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
import numpy as np

# Function to calculate net benefit for the model
def calculate_net_benefit(thresholds, y_true, y_proba):
    net_benefits = []
    for threshold in thresholds:
        w = threshold / (1 - threshold)  # Weight for false positives
        predictions = y_proba >= threshold  # Predictions based on the current threshold
        tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
        fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
        net_benefit = tp - (fp * w)  # Calculate net benefit
        net_benefits.append(net_benefit / len(y_true))
    return net_benefits

# Calculate net benefit for "All" treated (assumes all cases are positive)
def net_benefit_all(thresholds, y_true):
    prevalence = np.mean(y_true)
    return [prevalence - (1 - prevalence) * (threshold / (1 - threshold)) for threshold in thresholds]

# Calculate net benefit for "None" treated (assumes no cases are positive)
def net_benefit_none(thresholds):
    return [0 for _ in thresholds]

# Decision Curve Analysis
def decision_curve_analysis(y_true, y_pred_proba, thresholds):  # Perform Decision Curve Analysis (DCA) to evaluate the net benefit of a model across thresholds.
    num_cases = len(y_true) # Get the total number of cases in the dataset.
    net_benefits = []  # Initialize an empty list to store net benefits for each threshold.
    for threshold in thresholds: # Iterate over each threshold to calculate net benefit.
        y_pred = (y_pred_proba >= threshold).astype(int)  # Generate binary predictions based on the current threshold.
        tp = np.sum((y_pred == 1) & (y_true == 1))  # Count the number of true positives (correctly predicted positive cases).
        fp = np.sum((y_pred == 1) & (y_true == 0))  # Count the number of false positives (incorrectly predicted positive cases).
        net_benefit = calculate_net_benefit(tp, fp, threshold, num_cases) # Calculate the net benefit for the current threshold using true positives, false positives, and total cases.
        net_benefits.append(net_benefit) # Append the calculated net benefit to the list.
    return net_benefits

# Read datasets
train_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_train_selsect_lasso.csv")
test_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_test_selsect_lasso.csv")
train_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train.csv")
test_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test.csv")
train_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train_icc.csv")
test_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test_icc.csv")
train_data1 = train_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
test_data1 = test_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
train_data2 = train_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data2 = test_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
train_data3 = train_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data3 = test_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
# Prepare data for both models
X_train1 = train_data1.drop('label', axis=1)
y_train1 = train_data1['label']
X_test1 = test_data1.drop('label', axis=1)
y_test1 = test_data1['label']

X_train2 = train_data2.drop('label', axis=1)
y_train2 = train_data2['label']
X_test2 = test_data2.drop('label', axis=1)
y_test2 = test_data2['label']

X_train3 = train_data3.drop('label', axis=1)
y_train3 = train_data3['label']
X_test3 = test_data3.drop('label', axis=1)
y_test3 = test_data3['label']

# Initialize and train random forest models
model1 = RandomForestClassifier(n_estimators=300, random_state=42)
model1.fit(X_train1, y_train1)
y_pred_proba1 = model1.predict_proba(X_test1)[:, 1]
print(f"Model 1 (LASSO features) uses {X_train1.shape[1]} features.")

model2 = RandomForestClassifier(n_estimators=300, random_state=42)
model2.fit(X_train2, y_train2)
y_pred_proba2 = model2.predict_proba(X_test2)[:, 1]
print(f"Model 2 (ALL features) uses {X_train2.shape[1]} features.")

model3 = RandomForestClassifier(n_estimators=300, random_state=42)
model3.fit(X_train3, y_train3)
y_pred_proba3 = model3.predict_proba(X_test3)[:, 1]
print(f"Model 3 (ICC features) uses {X_train3.shape[1]} features.")

# 计算AUC
auc_score1 = roc_auc_score(y_test1, y_pred_proba1)
auc_score2 = roc_auc_score(y_test2, y_pred_proba2)
auc_score3 = roc_auc_score(y_test3, y_pred_proba3)

# Range of thresholds from 0.01 to 0.99
thresholds = np.linspace(0.01, 0.99, 100)

# Calculate net benefits for both models
model1_net_benefits = calculate_net_benefit(thresholds, y_test1, y_pred_proba1)
all_net_benefits1 = net_benefit_all(thresholds, y_test1)
none_net_benefits1 = net_benefit_none(thresholds)

model2_net_benefits = calculate_net_benefit(thresholds, y_test2, y_pred_proba2)
all_net_benefits2 = net_benefit_all(thresholds, y_test2)
none_net_benefits2 = net_benefit_none(thresholds)

model3_net_benefits = calculate_net_benefit(thresholds, y_test3, y_pred_proba3)
all_net_benefits3 = net_benefit_all(thresholds, y_test3)
none_net_benefits3 = net_benefit_none(thresholds)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
plt.rcParams['font.family'] = 'Times New Roman'
# Assuming you have these variables defined: y_test1, y_pred_proba1, auc_score1, etc.

plt.figure(figsize=(10, 8))

# ROC Curve for Model 1
fpr1, tpr1, _ = roc_curve(y_test1, y_pred_proba1)
plt.plot(fpr1, tpr1, color='blue', lw=2, label='LASSO features(286 features)(AUC = %0.3f)' % auc_score1)

# ROC Curve for Model 2
fpr2, tpr2, _ = roc_curve(y_test2, y_pred_proba2)
plt.plot(fpr2, tpr2, color='green', lw=2, label='ALL features(3860 features)(AUC = %0.3f)' % auc_score2)

fpr3, tpr3, _ = roc_curve(y_test3, y_pred_proba3)
plt.plot(fpr3, tpr3, color='red', lw=2, label='ICC features(1507 features)(AUC = %0.3f)' % auc_score3)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([-0.05, 1.0])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate', fontsize=24)
plt.ylabel('True Positive Rate', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc="lower right", fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.show()

plt.figure(figsize=(10, 8))
# DCA for Model 1
plt.plot(thresholds, model1_net_benefits, label='LASSO features(286 features)', color='blue', linestyle='-')

# DCA for Model 2
plt.plot(thresholds, model2_net_benefits, label='ALL features(3860 features)', color='green', linestyle='-')
# DCA for Model 3
plt.plot(thresholds, model3_net_benefits, label='ICC features(1507 features)', color='red', linestyle='-')
plt.plot(thresholds, all_net_benefits1, label=' All', color='gray', linestyle='--')
plt.plot(thresholds, none_net_benefits1, label=' None', color='black', linestyle=':')
plt.xlabel('Threshold Probability', fontsize=24)
plt.ylabel('Net Benefit', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()


In [None]:
#xgboost
import pandas as pd
from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
import numpy as np

# Function to calculate net benefit for the model
def calculate_net_benefit(thresholds, y_true, y_proba):
    net_benefits = []
    for threshold in thresholds:
        w = threshold / (1 - threshold)  # Weight for false positives
        predictions = y_proba >= threshold  # Predictions based on the current threshold
        tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
        fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
        net_benefit = tp - (fp * w)  # Calculate net benefit
        net_benefits.append(net_benefit / len(y_true))
    return net_benefits

# Calculate net benefit for "All" treated (assumes all cases are positive)
def net_benefit_all(thresholds, y_true):
    prevalence = np.mean(y_true)
    return [prevalence - (1 - prevalence) * (threshold / (1 - threshold)) for threshold in thresholds]

# Calculate net benefit for "None" treated (assumes no cases are positive)
def net_benefit_none(thresholds):
    return [0 for _ in thresholds]

# Decision Curve Analysis. This is the same as the previous one (RF)
def decision_curve_analysis(y_true, y_pred_proba, thresholds):
    num_cases = len(y_true)
    net_benefits = []
    for threshold in thresholds:
        y_pred = (y_pred_proba >= threshold).astype(int)
        tp = np.sum((y_pred == 1) & (y_true == 1))
        fp = np.sum((y_pred == 1) & (y_true == 0))
        net_benefit = calculate_net_benefit(tp, fp, threshold, num_cases)
        net_benefits.append(net_benefit)
    return net_benefits

# Read datasets
train_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_train_selsect_lasso.csv")
test_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_test_selsect_lasso.csv")
train_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train.csv")
test_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test.csv")
train_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train_icc.csv")
test_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test_icc.csv")
train_data1 = train_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
test_data1 = test_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
train_data2 = train_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data2 = test_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
train_data3 = train_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data3 = test_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
# Prepare data for both models
X_train1 = train_data1.drop('label', axis=1)
y_train1 = train_data1['label']
X_test1 = test_data1.drop('label', axis=1)
y_test1 = test_data1['label']

X_train2 = train_data2.drop('label', axis=1)
y_train2 = train_data2['label']
X_test2 = test_data2.drop('label', axis=1)
y_test2 = test_data2['label']

X_train3 = train_data3.drop('label', axis=1)
y_train3 = train_data3['label']
X_test3 = test_data3.drop('label', axis=1)
y_test3 = test_data3['label']

# Initialize and train random forest models
model1 = XGBClassifier(use_label_encoder=False)
model1.fit(X_train1, y_train1)
y_pred_proba1 = model1.predict_proba(X_test1)[:, 1]
print(f"Model 1 (LASSO features) uses {X_train1.shape[1]} features.")

model2 = XGBClassifier(use_label_encoder=False)
model2.fit(X_train2, y_train2)
y_pred_proba2 = model2.predict_proba(X_test2)[:, 1]
print(f"Model 2 (ALL features) uses {X_train2.shape[1]} features.")

model3 = XGBClassifier(use_label_encoder=False)
model3.fit(X_train3, y_train3)
y_pred_proba3 = model3.predict_proba(X_test3)[:, 1]
print(f"Model 3 (ICC features) uses {X_train3.shape[1]} features.")

# 计算AUC
auc_score1 = roc_auc_score(y_test1, y_pred_proba1)
auc_score2 = roc_auc_score(y_test2, y_pred_proba2)
auc_score3 = roc_auc_score(y_test3, y_pred_proba3)

# Range of thresholds from 0.01 to 0.99
thresholds = np.linspace(0.01, 0.99, 100)

# Calculate net benefits for both models
model1_net_benefits = calculate_net_benefit(thresholds, y_test1, y_pred_proba1)
all_net_benefits1 = net_benefit_all(thresholds, y_test1)
none_net_benefits1 = net_benefit_none(thresholds)

model2_net_benefits = calculate_net_benefit(thresholds, y_test2, y_pred_proba2)
all_net_benefits2 = net_benefit_all(thresholds, y_test2)
none_net_benefits2 = net_benefit_none(thresholds)

model3_net_benefits = calculate_net_benefit(thresholds, y_test3, y_pred_proba3)
all_net_benefits3 = net_benefit_all(thresholds, y_test3)
none_net_benefits3 = net_benefit_none(thresholds)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
plt.rcParams['font.family'] = 'Times New Roman'
# Assuming you have these variables defined: y_test1, y_pred_proba1, auc_score1, etc.

plt.figure(figsize=(10, 8))

# ROC Curve for Model 1
fpr1, tpr1, _ = roc_curve(y_test1, y_pred_proba1)
plt.plot(fpr1, tpr1, color='blue', lw=2, label='LASSO features(286 features)(AUC = %0.3f)' % auc_score1)

# ROC Curve for Model 2
fpr2, tpr2, _ = roc_curve(y_test2, y_pred_proba2)
plt.plot(fpr2, tpr2, color='green', lw=2, label='ALL features(3860 features)(AUC = %0.3f)' % auc_score2)

fpr3, tpr3, _ = roc_curve(y_test3, y_pred_proba3)
plt.plot(fpr3, tpr3, color='red', lw=2, label='ICC features(1507 features)(AUC = %0.3f)' % auc_score3)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([-0.05, 1.0])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate', fontsize=24)
plt.ylabel('True Positive Rate', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc="lower right", fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.show()

plt.figure(figsize=(10, 8))
# DCA for Model 1
plt.plot(thresholds, model1_net_benefits, label='LASSO features(286 features)', color='blue', linestyle='-')

# DCA for Model 2
plt.plot(thresholds, model2_net_benefits, label='ALL features(3860 features)', color='green', linestyle='-')
# DCA for Model 3
plt.plot(thresholds, model3_net_benefits, label='ICC features(1507 features)', color='red', linestyle='-')
plt.plot(thresholds, all_net_benefits1, label=' All', color='gray', linestyle='--')
plt.plot(thresholds, none_net_benefits1, label=' None', color='black', linestyle=':')
plt.xlabel('Threshold Probability', fontsize=24)
plt.ylabel('Net Benefit', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()



In [None]:
#LR
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
import numpy as np

# Function to calculate net benefit for the model
def calculate_net_benefit(thresholds, y_true, y_proba):
    net_benefits = []
    for threshold in thresholds:
        w = threshold / (1 - threshold)  # Weight for false positives
        predictions = y_proba >= threshold  # Predictions based on the current threshold
        tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
        fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
        net_benefit = tp - (fp * w)  # Calculate net benefit
        net_benefits.append(net_benefit / len(y_true))
    return net_benefits

# Calculate net benefit for "All" treated (assumes all cases are positive)
def net_benefit_all(thresholds, y_true):
    prevalence = np.mean(y_true)
    return [prevalence - (1 - prevalence) * (threshold / (1 - threshold)) for threshold in thresholds]

# Calculate net benefit for "None" treated (assumes no cases are positive)
def net_benefit_none(thresholds):
    return [0 for _ in thresholds]

# Decision Curve Analysis. This is the same as the previous one (RF)
def decision_curve_analysis(y_true, y_pred_proba, thresholds):
    num_cases = len(y_true)
    net_benefits = []
    for threshold in thresholds:
        y_pred = (y_pred_proba >= threshold).astype(int)
        tp = np.sum((y_pred == 1) & (y_true == 1))
        fp = np.sum((y_pred == 1) & (y_true == 0))
        net_benefit = calculate_net_benefit(tp, fp, threshold, num_cases)
        net_benefits.append(net_benefit)
    return net_benefits

# Read datasets
train_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_train_selsect_lasso.csv")
test_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/CHANGE_OSNAME/Total_GBM+LGG_T1+T2+T1GD+flair_s1_add_os_age_gender_label_test_selsect_lasso.csv")
train_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train.csv")
test_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test.csv")
train_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_train_icc.csv")
test_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s1_all_feature_test_icc.csv")
train_data1 = train_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
test_data1 = test_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
train_data2 = train_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data2 = test_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
train_data3 = train_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data3 = test_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
# Prepare data for both models
X_train1 = train_data1.drop('label', axis=1)
y_train1 = train_data1['label']
X_test1 = test_data1.drop('label', axis=1)
y_test1 = test_data1['label']

X_train2 = train_data2.drop('label', axis=1)
y_train2 = train_data2['label']
X_test2 = test_data2.drop('label', axis=1)
y_test2 = test_data2['label']

X_train3 = train_data3.drop('label', axis=1)
y_train3 = train_data3['label']
X_test3 = test_data3.drop('label', axis=1)
y_test3 = test_data3['label']

# Initialize and train random forest models
model1 = LogisticRegression(penalty="l2",solver="liblinear",C=1.0,max_iter=100)
model1.fit(X_train1, y_train1)
y_pred_proba1 = model1.predict_proba(X_test1)[:, 1]
print(f"Model 1 (LASSO features) uses {X_train1.shape[1]} features.")

model2 = LogisticRegression(penalty="l2",solver="liblinear",C=1.0,max_iter=100)
model2.fit(X_train2, y_train2)
y_pred_proba2 = model2.predict_proba(X_test2)[:, 1]
print(f"Model 2 (ALL features) uses {X_train2.shape[1]} features.")

model3 =LogisticRegression(penalty="l2",solver="liblinear",C=1.0,max_iter=100)
model3.fit(X_train3, y_train3)
y_pred_proba3 = model3.predict_proba(X_test3)[:, 1]
print(f"Model 3 (ICC features) uses {X_train3.shape[1]} features.")

# 计算AUC
auc_score1 = roc_auc_score(y_test1, y_pred_proba1)
auc_score2 = roc_auc_score(y_test2, y_pred_proba2)
auc_score3 = roc_auc_score(y_test3, y_pred_proba3)

# Range of thresholds from 0.01 to 0.99
thresholds = np.linspace(0.01, 0.99, 100)

# Calculate net benefits for both models
model1_net_benefits = calculate_net_benefit(thresholds, y_test1, y_pred_proba1)
all_net_benefits1 = net_benefit_all(thresholds, y_test1)
none_net_benefits1 = net_benefit_none(thresholds)

model2_net_benefits = calculate_net_benefit(thresholds, y_test2, y_pred_proba2)
all_net_benefits2 = net_benefit_all(thresholds, y_test2)
none_net_benefits2 = net_benefit_none(thresholds)

model3_net_benefits = calculate_net_benefit(thresholds, y_test3, y_pred_proba3)
all_net_benefits3 = net_benefit_all(thresholds, y_test3)
none_net_benefits3 = net_benefit_none(thresholds)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
plt.rcParams['font.family'] = 'Times New Roman'
# Assuming you have these variables defined: y_test1, y_pred_proba1, auc_score1, etc.

plt.figure(figsize=(10, 8))

# ROC Curve for Model 1
fpr1, tpr1, _ = roc_curve(y_test1, y_pred_proba1)
plt.plot(fpr1, tpr1, color='blue', lw=2, label='LASSO features(286 features)(AUC = %0.3f)' % auc_score1)

# ROC Curve for Model 2
fpr2, tpr2, _ = roc_curve(y_test2, y_pred_proba2)
plt.plot(fpr2, tpr2, color='green', lw=2, label='ALL features(3860 features)(AUC = %0.3f)' % auc_score2)

fpr3, tpr3, _ = roc_curve(y_test3, y_pred_proba3)
plt.plot(fpr3, tpr3, color='red', lw=2, label='ICC features(1507 features)(AUC = %0.3f)' % auc_score3)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([-0.05, 1.0])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate', fontsize=24)
plt.ylabel('True Positive Rate', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc="lower right", fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.show()

plt.figure(figsize=(10, 8))
# DCA for Model 1
plt.plot(thresholds, model1_net_benefits, label='LASSO features(286 features)', color='blue', linestyle='-')

# DCA for Model 2
plt.plot(thresholds, model2_net_benefits, label='ALL features(3860 features)', color='green', linestyle='-')
# DCA for Model 3
plt.plot(thresholds, model3_net_benefits, label='ICC features(1507 features)', color='red', linestyle='-')
plt.plot(thresholds, all_net_benefits1, label=' All', color='gray', linestyle='--')
plt.plot(thresholds, none_net_benefits1, label=' None', color='black', linestyle=':')
plt.xlabel('Threshold Probability', fontsize=24)
plt.ylabel('Net Benefit', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()

In [None]:
#svm
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# Function to calculate net benefit for the model
def calculate_net_benefit(thresholds, y_true, y_proba):
    net_benefits = []
    for threshold in thresholds:
        w = threshold / (1 - threshold)  # Weight for false positives
        predictions = y_proba >= threshold  # Predictions based on the current threshold
        tp = np.sum((predictions == 1) & (y_true == 1))  # True positives
        fp = np.sum((predictions == 1) & (y_true == 0))  # False positives
        net_benefit = tp - (fp * w)  # Calculate net benefit
        net_benefits.append(net_benefit / len(y_true))
    return net_benefits

# Calculate net benefit for "All" treated (assumes all cases are positive)
def net_benefit_all(thresholds, y_true):
    prevalence = np.mean(y_true)
    return [prevalence - (1 - prevalence) * (threshold / (1 - threshold)) for threshold in thresholds]

# Calculate net benefit for "None" treated (assumes no cases are positive)
def net_benefit_none(thresholds):
    return [0 for _ in thresholds]

# Decision Curve Analysis. This is the same as the previous one (RF)
def decision_curve_analysis(y_true, y_pred_proba, thresholds):
    num_cases = len(y_true)
    net_benefits = []
    for threshold in thresholds:
        y_pred = (y_pred_proba >= threshold).astype(int)
        tp = np.sum((y_pred == 1) & (y_true == 1))
        fp = np.sum((y_pred == 1) & (y_true == 0))
        net_benefit = calculate_net_benefit(tp, fp, threshold, num_cases)
        net_benefits.append(net_benefit)
    return net_benefits

# Read datasets
train_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/classification_lasso/drop_qianzhui/Total_GBM+LGG_T1+T2+T1GD+flair_s_add_os_age_gender_label_train_selsect_lasso.csv")
test_data1 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_3_lasso_feature_divide_train_test/classification_lasso/drop_qianzhui/Total_GBM+LGG_T1+T2+T1GD+flair_s_add_os_age_gender_label_test_selsect_lasso.csv")
train_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s_all_feature_train.csv")
test_data2 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_2_all_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s_all_feature_test.csv")
train_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s_all_feature_train_icc.csv")
test_data3 = pd.read_csv("D:/Apple-paper/Radiomics/survival analysis/survival analysis/APPLE/t1+t1Gd+t2+flair/2_1_icc_feature_divide_train_test/Total_GBM+LGG_t1+t2+t2Gd+flair_s_all_feature_test_icc.csv")
train_data1 = train_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
test_data1 = test_data1.drop(['gender','age_at_index','OS','OS.time'], axis=1)
train_data2 = train_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data2 = test_data2.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
train_data3 = train_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
test_data3 = test_data3.drop(['index','gender','age_at_index','OS','OS.time'], axis=1)
# Prepare data for both models
X_train1 = train_data1.drop('label', axis=1)
y_train1 = train_data1['label']
X_test1 = test_data1.drop('label', axis=1)
y_test1 = test_data1['label']

X_train2 = train_data2.drop('label', axis=1)
y_train2 = train_data2['label']
X_test2 = test_data2.drop('label', axis=1)
y_test2 = test_data2['label']

X_train3 = train_data3.drop('label', axis=1)
y_train3 = train_data3['label']
X_test3 = test_data3.drop('label', axis=1)
y_test3 = test_data3['label']

scaler1 = StandardScaler()
X_train1 = scaler1.fit_transform(X_train1)
X_test1 = scaler1.transform(X_test1)

scaler2 = StandardScaler()
X_train2 = scaler2.fit_transform(X_train2)
X_test2 = scaler2.transform(X_test2)

scaler3 = StandardScaler()
X_train3 = scaler3.fit_transform(X_train3)
X_test3 = scaler3.transform(X_test3)

#Initialize and train random forest models
model1 = SVC(kernel='linear', C=1.0,probability=True,class_weight=None)
model1.fit(X_train1, y_train1)
y_pred_proba1 = model1.predict_proba(X_test1)[:, 1]
print(f"Model 1 (LASSO features) uses {X_train1.shape[1]} features.")

model2 = SVC(kernel='linear', C=1.0,probability=True,class_weight=None)
model2.fit(X_train2, y_train2)
y_pred_proba2 = model2.predict_proba(X_test2)[:, 1]
print(f"Model 2 (ALL features) uses {X_train2.shape[1]} features.")

model3 =SVC(kernel='linear', C=1.0,probability=True,class_weight=None)
model3.fit(X_train3, y_train3)
y_pred_proba3 = model3.predict_proba(X_test3)[:, 1]
print(f"Model 3 (ICC features) uses {X_train3.shape[1]} features.")
#Define the parameter grid for grid search
#param_grid = {'C': [1], 'kernel': ['linear', 'rbf'], 'class_weight': [None, 'balanced']}

# Model 1 with Grid Search on LASSO features
# grid_search1 = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='roc_auc')
# grid_search1.fit(X_train1, y_train1)
# best_model1 = grid_search1.best_estimator_
# y_pred_proba1 = best_model1.predict_proba(X_test1)[:, 1]
# print(f"Best parameters for Model 1 (LASSO features): {grid_search1.best_params_}")
# print(f"Model 1 (LASSO features) uses {X_train1.shape[1]} features.")

# # Model 2 with Grid Search on ALL features
# grid_search2 = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='roc_auc')
# grid_search2.fit(X_train2, y_train2)
# best_model2 = grid_search2.best_estimator_
# y_pred_proba2 = best_model2.predict_proba(X_test2)[:, 1]
# print(f"Best parameters for Model 2 (ALL features): {grid_search2.best_params_}")
# print(f"Model 2 (ALL features) uses {X_train2.shape[1]} features.")

# # Model 3 with Grid Search on ICC features
# grid_search3 = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='roc_auc')
# grid_search3.fit(X_train3, y_train3)
# best_model3 = grid_search3.best_estimator_
# y_pred_proba3 = best_model3.predict_proba(X_test3)[:, 1]
# print(f"Best parameters for Model 3 (ICC features): {grid_search3.best_params_}")
# print(f"Model 3 (ICC features) uses {X_train3.shape[1]} features.")


# 计算AUC
auc_score1 = roc_auc_score(y_test1, y_pred_proba1)
auc_score2 = roc_auc_score(y_test2, y_pred_proba2)
auc_score3 = roc_auc_score(y_test3, y_pred_proba3)

# Range of thresholds from 0.01 to 0.99
thresholds = np.linspace(0.01, 0.99, 100)

# Calculate net benefits for both models
model1_net_benefits = calculate_net_benefit(thresholds, y_test1, y_pred_proba1)
all_net_benefits1 = net_benefit_all(thresholds, y_test1)
none_net_benefits1 = net_benefit_none(thresholds)

model2_net_benefits = calculate_net_benefit(thresholds, y_test2, y_pred_proba2)
all_net_benefits2 = net_benefit_all(thresholds, y_test2)
none_net_benefits2 = net_benefit_none(thresholds)

model3_net_benefits = calculate_net_benefit(thresholds, y_test3, y_pred_proba3)
all_net_benefits3 = net_benefit_all(thresholds, y_test3)
none_net_benefits3 = net_benefit_none(thresholds)

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
plt.rcParams['font.family'] = 'Times New Roman'
# Assuming you have these variables defined: y_test1, y_pred_proba1, auc_score1, etc.

plt.figure(figsize=(10, 8))

# ROC Curve for Model 1
fpr1, tpr1, _ = roc_curve(y_test1, y_pred_proba1)
plt.plot(fpr1, tpr1, color='blue', lw=2, label='LASSO features(290 features)(AUC = %0.3f)' % auc_score1)

# ROC Curve for Model 2
fpr2, tpr2, _ = roc_curve(y_test2, y_pred_proba2)
plt.plot(fpr2, tpr2, color='green', lw=2, label='ALL features(3860 features)(AUC = %0.3f)' % auc_score2)

fpr3, tpr3, _ = roc_curve(y_test3, y_pred_proba3)
plt.plot(fpr3, tpr3, color='red', lw=2, label='ICC features(1507 features)(AUC = %0.3f)' % auc_score3)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([-0.05, 1.0])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate', fontsize=24)
plt.ylabel('True Positive Rate', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc="lower right", fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.show()

plt.figure(figsize=(10, 8))
# DCA for Model 1
plt.plot(thresholds, model1_net_benefits, label='LASSO features(290 features)', color='blue', linestyle='-')

# DCA for Model 2
plt.plot(thresholds, model2_net_benefits, label='ALL features(3860 features)', color='green', linestyle='-')
# DCA for Model 3
plt.plot(thresholds, model3_net_benefits, label='ICC features(1507 features)', color='red', linestyle='-')
plt.plot(thresholds, all_net_benefits1, label=' All', color='gray', linestyle='--')
plt.plot(thresholds, none_net_benefits1, label=' None', color='black', linestyle=':')
plt.xlabel('Threshold Probability', fontsize=24)
plt.ylabel('Net Benefit', fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.grid(True, linestyle='-', linewidth=0.5)
plt.minorticks_on()
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black') 
plt.xlim([0, 1])
plt.ylim([-0.1, 1])
plt.show()