In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
data = np.load('data/oversampled_data.npy', allow_pickle=True)
gene = np.load('data/geneAfterDiscard_0.npy', allow_pickle=True)

feature = data[:, :-1]
label = data[:, -1]
print(gene.shape)
print(feature.shape)
print(label.shape)

(11959,)
(296, 11959)
(296,)


In [7]:
# Take out the gene of choice.

fina_gene = ['SPAG1', 'FIGN', 'NUBPL', 'CHMP5', 'TCF7L2', 'COQ10B', 'BSDC1', 'ZFPM1', 'GRPEL1']
gene_idx = []
for g in fina_gene:
    gene_idx.append(np.where(gene==g)[0][0])
gene_idx = np.array(gene_idx)

fina_feature = feature[:, gene_idx]

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score

import time

X_train, X_test, y_train, y_test = train_test_split(fina_feature, label, test_size=0.3, shuffle=True, random_state=2022)

# RandomForestClassifier

In [16]:
start = time.time()
from sklearn.ensemble import RandomForestClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.9663	0.9718	0.9581	0.9888	0.5241339206695557


# GradientBoostingClassifier

In [10]:
start = time.time()
from sklearn.ensemble import GradientBoostingClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = GradientBoostingClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.9236	0.9095	0.9349	0.975	0.3002331256866455


# svm

In [11]:
start = time.time()
from sklearn import svm

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = svm.SVC(probability=True)
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.8315	0.7917	0.8837	0.9424	0.05584979057312012


# LogisticRegression

In [12]:
start = time.time()
from sklearn.linear_model import LogisticRegression

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = LogisticRegression()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.8652	0.8298	0.907	0.9469	0.060837507247924805


# KNeighborsClassifier

In [13]:
start = time.time()
from sklearn.neighbors import KNeighborsClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.8315	0.8043	0.8605	0.9138	0.028429746627807617


# MLPClassifier

In [14]:
start = time.time()
from sklearn.neural_network import MLPClassifier

allAcc = []
allPrec = []
allRec = []
allAuc = []
for _ in range(5):
    clf = MLPClassifier(hidden_layer_sizes=(180, 180, 60, 50), max_iter=300)
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)

    allAcc.append(accuracy_score(y_test, y_pred))
    allPrec.append(precision_score(y_test, y_pred))
    allRec.append(recall_score(y_test, y_pred))
    predict_prob_y = clf.predict_proba(X_test)[:, 1]
    allAuc.append(roc_auc_score(y_test, predict_prob_y))

end = time.time()

t = end -start
print(f'{round(np.mean(allAcc), 4)}\t{round(np.mean(allPrec), 4)}\t{round(np.mean(allRec), 4)}\t{round(np.mean(allAuc), 4)}\t{t}')

0.7798	0.8497	0.7023	0.9146	0.8722796440124512
