In [2]:
import numpy as np
import pandas as pd
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
import math
import torch
import warnings
warnings.filterwarnings("ignore")


Cross-Validation on the train dataset

In [2]:
def cv(clf, X, y, nr_fold):
    ix = []
    for i in range(0, len(y)):
        ix.append(i)
    ix = np.array(ix)
    
    allACC = []
    allSENS = []
    allSPEC = []
    allMCC = []
    allAUC = []
    for j in range(0, nr_fold):
        train_ix = ((ix % nr_fold) != j)
        test_ix = ((ix % nr_fold) == j)
        train_X, test_X = X[train_ix], X[test_ix]
        train_y, test_y = y[train_ix], y[test_ix]
        clf.fit(train_X, train_y)        
        p = clf.predict(test_X)
        pr = clf.predict_proba(test_X)[:,1]   
        TP=0   
        FP=0
        TN=0
        FN=0
        for i in range(0,len(test_y)):
            if test_y[i]==1 and p[i]==1:
                TP+= 1
            elif test_y[i]==1 and p[i]==0:
                FN+= 1
            elif test_y[i]==0 and p[i]==1:
                FP+= 1
            elif test_y[i]==0 and p[i]==0:
                TN+= 1
        ACC = (TP+TN)/(TP+FP+TN+FN)
        SENS = TP/(TP+FN)
        SPEC = TN/(TN+FP)
        det = math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
        if (det == 0):            
            MCC = 0                
        else:
            MCC = ((TP*TN)-(FP*FN))/det
        AUC = roc_auc_score(test_y, pr)
             
        allACC.append(ACC)
        allSENS.append(SENS)
        allSPEC.append(SPEC)
        allMCC.append(MCC)
        allAUC.append(AUC)
           
    return np.mean(allACC), np.mean(allSENS), np.mean(allSPEC), np.mean(allMCC), np.mean(allAUC)

Independent test on the test dataset

In [3]:
def test(clf, X, y, Xt, yt):
    train_X, test_X = X, Xt
    train_y, test_y = y, yt        
    p = clf.predict(test_X)
    pr = clf.predict_proba(test_X)[:,1]   
    TP=0   
    FP=0
    TN=0
    FN=0
    for i in range(0,len(test_y)):
        if test_y[i]==1 and p[i]==1:
            TP+= 1
        elif test_y[i]==1 and p[i]==0:
            FN+= 1
        elif test_y[i]==0 and p[i]==1:
            FP+= 1
        elif test_y[i]==0 and p[i]==0:
            TN+= 1
    ACC = (TP+TN)/(TP+FP+TN+FN)
    SENS = TP/(TP+FN)
    SPEC = TN/(TN+FP)
    det = math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    if (det == 0):            
        MCC = 0                
    else:
        MCC = ((TP*TN)-(FP*FN))/det
    AUC = roc_auc_score(test_y, pr)
 
    return ACC, SENS, SPEC, MCC, AUC

Load dataset and split the dataset(Please change your paths)

In [4]:
pos_ade = torch.load("C:\\Windows\\System32\\PLMTHP\\data\\Feature\\pos_ade.pt")
neg_ade = torch.load("C:\\Windows\\System32\\PLMTHP\\data\\Feature\\neg_ade.pt")

pos = pos_ade.numpy()
neg = neg_ade.numpy()

all_data = np.concatenate((pos, neg), axis=0)
X = all_data
y=np.zeros(1302,dtype=int)
for i in range(1302):
    if i<651:
        y[i]=1
    else:
        y[i]=0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X=X_train
y=y_train
Xt=X_test
yt=y_test

del pos_ade,neg_ade,pos,neg,all_data

5ML classifiers cross-validation evaluation and independent test evaluation

In [6]:
KNN = KNeighborsClassifier(n_neighbors=14)
acc, sens, spec, mcc, auc = cv(KNN, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")
acc, sens, spec, mcc, auc = test(KNN, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8245945945945946
SENS:0.7888477215290215
SPEC:0.8594419487263991
MCC:0.6490361758120748
AUC:0.9082641538194066

ACC:0.826530612244898
SENS:0.7941176470588235
SPEC:0.8617021276595744
MCC:0.655819774718398
AUC:0.916875260742595



In [7]:
MLP = MLPClassifier(hidden_layer_sizes=(8,),random_state=0)
acc, sens, spec, mcc, auc = cv(MLP, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")
acc, sens, spec, mcc, auc = test(MLP, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8427190827190827
SENS:0.836464368203526
SPEC:0.8468680526636054
MCC:0.6856541985889383
AUC:0.9210426418572805

ACC:0.8469387755102041
SENS:0.8921568627450981
SPEC:0.7978723404255319
MCC:0.6946817915866508
AUC:0.9207342511472674



In [8]:
NB = GaussianNB()
acc, sens, spec, mcc, auc = cv(NB, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")
acc, sens, spec, mcc, auc = test(NB, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8055937755937757
SENS:0.7935390744999018
SPEC:0.8150182769699622
MCC:0.6078674713743701
AUC:0.8368052994819708

ACC:0.8163265306122449
SENS:0.7941176470588235
SPEC:0.8404255319148937
MCC:0.6341464654751245
AUC:0.8559136420525657



In [9]:
SVMLN = SVC(C=1,kernel='linear',gamma=1, probability=True, random_state=0)
acc, sens, spec, mcc, auc = cv(SVMLN, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")
acc, sens, spec, mcc, auc = test(SVMLN, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8346027846027846
SENS:0.8252496156358322
SPEC:0.842045155386239
MCC:0.6688529181690135
AUC:0.918059324163236

ACC:0.8112244897959183
SENS:0.8333333333333334
SPEC:0.7872340425531915
MCC:0.6216380756293769
AUC:0.9216729244889446



In [10]:
SVMRBF = SVC(C=10,kernel='rbf',gamma=0.1, probability=True, random_state=0)
acc, sens, spec, mcc, auc = cv(SVMRBF, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")
acc, sens, spec, mcc, auc = test(SVMRBF, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8427108927108927
SENS:0.8325814927646558
SPEC:0.8490179324581163
MCC:0.6837289836244743
AUC:0.917464412042483

ACC:0.8520408163265306
SENS:0.8627450980392157
SPEC:0.8404255319148937
MCC:0.7035008862368752
AUC:0.9187526074259491



In [5]:
param1 = [0.4 * i for i in np.arange(2, 5, dtype=float)]
print(len(param1))

3


Weighted voting ensemble classifiers

In [12]:
estimators = [("KNN", KNN), ("MLP",MLP), ("NB", NB), ("SVMLN", SVMLN), ("SVMRBF", SVMRBF)]
clf_weighted3 = VotingClassifier(estimators,voting="soft",weights=[1.0,1.2,0.6,1.1,1.1])


Cross-validation evaluation

In [13]:
acc, sens, spec, mcc, auc = cv(clf_weighted3, X, y, 10) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.859025389025389
SENS:0.8439853677983324
SPEC:0.8707009886593202
MCC:0.7170917576448932
AUC:0.9262864055025499



Independent test evaluation

In [15]:
acc, sens, spec, mcc, auc = test(clf_weighted3, X, y, Xt, yt) 
print("ACC:"+str(acc)+"\n"+"SENS:"+str(sens)+"\n"+"SPEC:"+str(spec)+"\n"+"MCC:"+str(mcc)+"\n"+"AUC:"+str(auc)+"\n")


ACC:0.8367346938775511
SENS:0.8529411764705882
SPEC:0.8191489361702128
MCC:0.6727921812670322
AUC:0.9307467667918231

