In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = np.load('data/oversampled_data.npy', allow_pickle=True)
gene = np.load('data/geneAfterDiscard_0.npy', allow_pickle=True)

feature = data[:, :-1]
label = data[:, -1]
print(gene.shape)
print(feature.shape)
print(label.shape)

(11959,)
(296, 11959)
(296,)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

import time

X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=0.3, shuffle=True, random_state=2022)

# RandomForestClassifier

In [4]:
start = time.time()
from sklearn.ensemble import RandomForestClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.9483	0.9035	1.0	0.9947	2.477818250656128


# GradientBoostingClassifier

In [5]:
start = time.time()
from sklearn.ensemble import GradientBoostingClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = GradientBoostingClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.9461	0.9065	0.9907	0.9855	173.70992755889893


# svm

In [6]:
start = time.time()
from sklearn import svm

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = svm.SVC(probability=True)
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.8315	0.7593	0.9535	0.9358	61.20520091056824


# LogisticRegression

In [None]:
start = time.time()
from sklearn.linear_model import LogisticRegression

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

# KNeighborsClassifier

In [8]:
start = time.time()
from sklearn.neighbors import KNeighborsClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.6629	0.589	1.0	0.8602	0.15361952781677246


# MLPClassifier

In [9]:
start = time.time()
from sklearn.neural_network import MLPClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = MLPClassifier(hidden_layer_sizes=(180, 180, 60, 50), max_iter=300)
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.6472	0.8412	0.4186	0.6705	38.90723776817322
