In [None]:
# Current directory
import os
os.chdir('E:\work\mine')

### 导入数据

In [None]:
import numpy as np
import pandas as pd

X_new=pd.read_csv(r"extract\All_feature\features\TPC.csv",header=None)
y_new= pd.read_csv(r'extract\label.csv',header=None)

# X_new= X_new[:,1:]
print(X_new.shape)
print(y_new.shape)
X_new = np.array(X_new)
y_new = np.array(y_new).ravel()

### 数据标准化

In [None]:
from sklearn.preprocessing import StandardScaler

# 使用 StandardScaler 进行标准化
scaler = StandardScaler()
X_new = scaler.fit_transform(X_new)

### 寻找最佳参数

In [None]:

# dataset splitting
from sklearn.model_selection import train_test_split
X_train_whole, X_ind_test, y_train_whole, y_ind_test =  train_test_split( X_new, y_new, test_size=0.2, random_state=1111)

print(X_train_whole.shape)
print(X_ind_test.shape)

### 参数寻优

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score


# 定义参数范围
param_grid = {
    'C': [2 ** exp for exp in range(-4, 5)],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [1, 3, 5, 7, 9],
    'tol': [1e-5]
}

# 创建SVC分类器对象
svm_clf = SVC()

# 创建网格搜索对象
grid_search = GridSearchCV(estimator=svm_clf, param_grid=param_grid, cv=StratifiedKFold(n_splits=10), scoring='balanced_accuracy')

# 进行网格搜索
grid_search.fit(X_train_whole, y_train_whole)

# 输出最佳参数组合
print("Best Parameters: ", grid_search.best_params_)

# 输出最佳参数组合下的平均准确率和方差
print("Best Balanced Accuracy:", round(grid_search.best_score_, 3))


### 测试

### 10折交叉验证

In [None]:
# 10折交叉验证测试
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import  confusion_matrix, roc_auc_score
import numpy as np
import pandas as pd
X_train_whole, X_ind_test, y_train_whole, y_ind_test =  train_test_split( X_new, y_new, test_size=0.2, random_state=1111)


BACC_collection = []
ACC_collection = []
Sn_collection = []
Sp_collection = []
MCC_collection = []
AUC_collection = []

# Initialize lists to store predictions and true values for each fold
y_true_all = []
y_pred_all = []
y_pred_proba_all = []

mean_recall = np.linspace(0, 1, 100)
all_precision = []
base_fpr = np.linspace(0, 1, 100)
mean_tpr = 0.0
interp_tpr_collection = []


def categorical_probas_to_classes(p):
    return np.argmax(p, axis=1)

clf = SVC(C = 1, degree = 3, kernel = 'poly', tol=1e-05, probability=True) 
skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(X_train_whole, y_train_whole):


    X_train, X_valid, y_train, y_valid = X_train_whole[train], X_train_whole[test], y_train_whole[train], y_train_whole[test]
    clf.fit(X_train, y_train)

    y_pred_proba = clf.predict_proba(X_valid) 
    y_valid_pred = categorical_probas_to_classes(y_pred_proba)


    y_true_all.extend(y_valid.ravel().astype(int))
    y_pred_all.extend(y_valid_pred.astype(int))
    y_pred_proba_all.extend(y_pred_proba[:,1])

    TP, FP, FN, TN = confusion_matrix(y_valid, y_valid_pred).ravel()
    Sn_collection.append(TP / (TP + FN))
    Sp_collection.append(TN / (TN + FP))
    MCC = (TP*TN-FP*FN)/np.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
    MCC_collection.append(MCC)
    ACC_collection.append((TP + TN) / (TP + TN + FP + FN))
    BACC_collection.append(0.5 * (TP / (TP + FN)) + 0.5 * (TN / (TN + FP)))
    auc = roc_auc_score(y_valid, y_pred_proba[:, 1])
    AUC_collection.append(auc)

results_df = pd.DataFrame({
    'Predicted_Proba': np.round(y_pred_proba_all, 8),  
    'True_Label': y_true_all,
    'Predicted_Label': y_pred_all
})

# Save the results to a CSV file
# results_df.to_csv(r'Result\SVM\10折/SVM_Group4.csv', index=False)
    
# 输出结果
print(round(np.mean(ACC_collection),3), '±', round(np.std(ACC_collection),3))
print(round(np.mean(Sn_collection),3), '±', round(np.std(Sn_collection),3))
print(round(np.mean(Sp_collection),3), '±', round(np.std(Sp_collection),3))
print(round(np.mean(MCC_collection),3), '±', round(np.std(MCC_collection),3))
print(round(np.mean(AUC_collection),3), '±', round(np.std(AUC_collection),3))


### 独立测试

In [None]:
# Independence test
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import statistics
from sklearn.metrics import confusion_matrix
import math
from sklearn.metrics import roc_auc_score

# result collection list
BACC_collecton = []
ACC_collection = []
Sn_collecton = []
Sp_collecton = []
MCC_collecton = []
AUC_collecton = []
AP=[]
mean_recall = np.linspace(0, 1, 100)
all_precision = []
base_fpr = np.linspace(0, 1, 100)
mean_tpr = 0.0
interp_tpr_collection = []

def categorical_probas_to_classes(p):
    return np.argmax(p, axis=1)

for i in range(10):
    
    X_train_whole, X_ind_test, y_train_whole, y_ind_test = train_test_split(X_new, y_new, test_size=0.2, random_state=i)
    clf = SVC(C = 4, degree = 1, kernel = 'rbf', tol=1e-05, probability=True)
    clf.fit(X_train_whole, y_train_whole)   
    y_pred_score = clf.predict_proba(X_ind_test)
    y_pred = categorical_probas_to_classes(y_pred_score)     
    y_true = y_ind_test                
    TP, FP, FN, TN = confusion_matrix(y_true, y_pred).ravel() 
    Sn_collecton.append(TP/(TP+FN))
    Sp_collecton.append(TN/(TN+FP))
    ACC_collection.append((TP+TN)/(TP+TN+FP+FN))
    MCC = (TP*TN-FP*FN)/math.pow(((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),0.5)
    MCC_collecton.append(MCC)
    BACC_collecton.append(0.5*TP/(TP+FN)+0.5*TN/(TN+FP))
    auc = roc_auc_score(y_true, y_pred_score[:, 1])
    AUC_collecton.append(auc)
  
    
print(round(statistics.mean(ACC_collection),3),'±',round(statistics.stdev(ACC_collection),3))
print(round(statistics.mean(Sn_collecton),3),'±',round(statistics.stdev(Sn_collecton),3))
print(round(statistics.mean(Sp_collecton),3),'±',round(statistics.stdev(Sp_collecton),3))
print(round(statistics.mean(MCC_collecton),3),'±',round(statistics.stdev(MCC_collecton),3))
print(round(statistics.mean(AUC_collecton),3),'±',round(statistics.stdev(AUC_collecton),3))

