In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import os

default_folder = r''  # 设置默认文件夹路径

# 更改当前工作目录为默认文件夹路径
os.chdir(default_folder)

# Read data
traindata = pd.read_excel("")
valdata = pd.read_excel("")
testdata = pd.read_excel("")

# 将标签转换为数值
train_labels = traindata["grade"]
val_labels = valdata["grade"]
test_labels = testdata["grade"]


# Remove the first column
traindata = traindata.iloc[:, 1:]
valdata = valdata.iloc[:, 1:]
testdata = testdata.iloc[:, 1:]

# 将数据分为特征和标签
train_features = traindata.drop(columns=["grade"])
val_features = valdata.drop(columns=["grade"])
test_features = testdata.drop(columns=["grade"])

# 使用Z-score标准化对特征进行处理
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)
test_features_scaled = scaler.transform(test_features)

# Train SVM model
#svm_model = SVC(kernel='rbf', C=1, probability=True, random_state=42)
svm_model = SVC(kernel='rbf', C=1, gamma=0.01,probability=True, random_state=42)
svm_model.fit(train_features_scaled, train_labels)

# Predict probabilities on validation and test sets
train_pred_prob = svm_model.predict_proba(train_features_scaled)
val_pred_prob = svm_model.predict_proba(val_features_scaled)
test_pred_prob = svm_model.predict_proba(test_features_scaled)

# Calculate ROC AUC scores
train_auc = roc_auc_score(train_labels, train_pred_prob[:, 1])
val_auc = roc_auc_score(val_labels, val_pred_prob[:, 1])
test_auc = roc_auc_score(test_labels, test_pred_prob[:, 1])

# Print AUC values for validation and test sets
print("Train AUC:",train_auc)
print("Validation AUC:", val_auc)
print("Test AUC:", test_auc)



In [None]:
import shap
import matplotlib.pyplot as plt

In [None]:
def svm_predict_func(input_data):
    return svm_model.predict_proba(input_data)[:, 1]  

explainer = shap.Explainer(svm_predict_func, train_features_scaled, feature_names=train_features.columns)

shap_values_train = explainer(train_features_scaled)

In [None]:
# summarize the effects of all the features
# Create a figure
fig = plt.figure()
shap.plots.beeswarm(shap_values_train, show=False)
plt.savefig('train_fold1_shap.tiff', dpi=300, format='tiff', bbox_inches='tight')
plt.show()

In [None]:
# 计算SHAP值
shap_values_val = explainer(val_features_scaled)
import matplotlib.pyplot as plt
fig = plt.figure()
shap.plots.beeswarm(shap_values_val,show=False)
plt.savefig('val_fold5_shap.tiff', dpi=300, format='tiff', bbox_inches='tight')
plt.show()

In [None]:
# visualize the first prediction's explanation
shap.plots.waterfall(shap_values_val[0])

In [None]:
# visualize all the training set predictions
force_plot_val=shap.plots.force(shap_values_val)
shap.save_html('forceplot_val_fold5.html', force_plot_val)

In [None]:
# 计算SHAP值
shap_values_test = explainer(test_features_scaled)
import matplotlib.pyplot as plt
shap.plots.beeswarm(shap_values_test)

In [None]:
# visualize the first prediction's explanation
shap.plots.waterfall(shap_values_test[0])

In [None]:
# visualize all the training set predictions
force_plot_test=shap.plots.force(shap_values_test)
shap.save_html('', force_plot_test)