In [1]:
import numpy as np
import pandas as pd
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import re, os, sys
import math
import torch
import warnings
warnings.filterwarnings("ignore")


Cross-Validation on the train dataset

In [2]:
def cv(clf, X, y, nr_fold):
    ix = []
    for i in range(0, len(y)):
        ix.append(i)
    ix = np.array(ix)
    
    allACC = []
    allSENS = []
    allSPEC = []
    allMCC = []
    allAUC = []
    for j in range(0, nr_fold):
        train_ix = ((ix % nr_fold) != j)
        test_ix = ((ix % nr_fold) == j)
        train_X, test_X = X[train_ix], X[test_ix]
        train_y, test_y = y[train_ix], y[test_ix]
        clf.fit(train_X, train_y)        
        p = clf.predict(test_X)
        pr = clf.predict_proba(test_X)[:,1]   
        TP=0   
        FP=0
        TN=0
        FN=0
        for i in range(0,len(test_y)):
            if test_y[i]==1 and p[i]==1:
                TP+= 1
            elif test_y[i]==1 and p[i]==0:
                FN+= 1
            elif test_y[i]==0 and p[i]==1:
                FP+= 1
            elif test_y[i]==0 and p[i]==0:
                TN+= 1
        ACC = (TP+TN)/(TP+FP+TN+FN)
        SENS = TP/(TP+FN)
        SPEC = TN/(TN+FP)
        det = math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
        if (det == 0):            
            MCC = 0                
        else:
            MCC = ((TP*TN)-(FP*FN))/det
        AUC = roc_auc_score(test_y, pr)
      
        allACC.append(ACC)
        allSENS.append(SENS)
        allSPEC.append(SPEC)
        allMCC.append(MCC)
        allAUC.append(AUC)
      
    return np.mean(allACC), np.mean(allSENS), np.mean(allSPEC), np.mean(allMCC), np.mean(allAUC)

Independent test on the test dataset

In [3]:
def test(clf, X, y, Xt, yt):
    train_X, test_X = X, Xt
    train_y, test_y = y, yt       
    p = clf.predict(test_X)
    pr = clf.predict_proba(test_X)[:,1]   
    TP=0   
    FP=0
    TN=0
    FN=0
    for i in range(0,len(test_y)):
        if test_y[i]==1 and p[i]==1:
            TP+= 1
        elif test_y[i]==1 and p[i]==0:
            FN+= 1
        elif test_y[i]==0 and p[i]==1:
            FP+= 1
        elif test_y[i]==0 and p[i]==0:
            TN+= 1
    ACC = (TP+TN)/(TP+FP+TN+FN)
    SENS = TP/(TP+FN)
    SPEC = TN/(TN+FP)
    det = math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    if (det == 0):            
        MCC = 0                
    else:
        MCC = ((TP*TN)-(FP*FN))/det
    AUC = roc_auc_score(test_y, pr)
    
    return ACC, SENS, SPEC, MCC, AUC

Load dataset and split the dataset(Please change your paths)

In [4]:
pos_ade = torch.load("C:\\Windows\\System32\\PLMTHP\\data\\Feature\\pos_ade.pt")
neg_ade = torch.load("C:\\Windows\\System32\\PLMTHP\\data\\Feature\\neg_ade.pt")

pos = pos_ade.numpy()
neg = neg_ade.numpy()

all_data = np.concatenate((pos, neg), axis=0)
X = all_data
y=np.zeros(1302,dtype=int)
for i in range(1302):
    if i<651:
        y[i]=1
    else:
        y[i]=0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X=X_train
y=y_train
Xt=X_test
yt=y_test

del pos_ade,neg_ade,pos,neg,all_data

Tuning the best parameters of SVMRBF classifier

In [5]:
from sklearn.model_selection import GridSearchCV
clf = SVC(kernel='rbf', probability=True, random_state=0)
grid = GridSearchCV(clf, param_grid={"C":[0.1, 1, 10], "gamma": [1, 0.1, 0.01]}, scoring='roc_auc', cv=10) # 总共有9种参数组合的搜索空间
grid.fit(X, y)
print("The best parameters are %s"
      % (grid.best_params_))

The best parameters are {'C': 10, 'gamma': 0.1}


Train the model

In [9]:
clf = SVC(C=10, kernel='rbf', gamma=0.1, probability=True, random_state=0).fit(X,y)
clf = clf.fit(X, y)
p = clf.predict(Xt)
pr = clf.predict_proba(Xt)[:,1]

Cross-validation evaluation

In [10]:
acc, sens, spec, mcc, auc = cv(clf, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8427108927108927
SENS:0.8325814927646558
SPEC:0.8490179324581163
MCC:0.6837289836244743
AUC:0.917464412042483



Independent test evaluation

In [11]:
acc, sens, spec, mcc, auc= test(clf, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8520408163265306
SENS:0.8627450980392157
SPEC:0.8404255319148937
MCC:0.7035008862368752
AUC:0.9187526074259491

