In [None]:
import sys
import random
import numpy as np
import keras
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from keras import optimizers
import sklearn.metrics as metrics
from numpy import interp
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import plot_precision_recall_curve

In [None]:
##################
# You have to modify file names and path to files as you need.
path_to_data_file = 'PATH/TO/DATA/FILE'
test_index_file = 'PATH/TO/TEST/INDEX'
train_index_file = 'PATH/TO/TRAIN/INDEX'
saved_model_folder = 'PATH/TO/MODEL/SAVED'
performance_file = 'PATH/TO/PERFORMANCE/FILE'
##################

# You have to modify this part by your own hyperparameter set.
n_nodes = 60
n_layers = 5
lr = 0.00003
n_batch = 20
n_epochs = 200
n_cv = 3

seed = random.randint(0,10000)

whole_data = [line.strip().split('\t') for line in open(path_to_data_file)]
del(whole_data[0])

In [None]:
featDic= {}
data_list_x = []
data_list_y = []

for line in whole_data:
    featDic[line[0]] = list(map(float,line[1:11])) + list(map(int,line[11:13])) + list(map(float,line[13:15])) + list(map(int,line[15]))
    data_list_x.append(featDic[line[0]][:-1])
    data_list_y.append(featDic[line[0]][-1])

data_x_bf_st = np.array(data_list_x)
data_x = (data_x_bf_st - np.mean(data_x_bf_st, axis=0))/np.std(data_x_bf_st, axis=0)
data_y = np.array(data_list_y)

In [None]:
cv = StratifiedKFold(n_splits=n_cv, shuffle=True, random_state=seed)

tprs = []
accs = []
ROC_aucs = []
f1s = []
RP_aucs = []

i = 0

In [None]:
for train, test in cv.split(data_x, data_y):

    with open(test_index_file, 'a') as index_file:
        index_file.write('%i_%i\t' % (seed, i + 1))
        index_file.write(','.join(str(x) for x in test) + '\n')
    with open(train_index_file, 'a') as train_index_file:
        train_index_file.write('%i_%i\t' % (seed, i + 1))
        train_index_file.write(','.join(str(x) for x in train) + '\n')

    model = keras.Sequential()
    for i_layer in range(n_layers):
        model.add(keras.layers.Dense(n_nodes, activation=tf.nn.relu))
    model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

    Adam=optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999)
    model.compile(loss='binary_crossentropy',
        optimizer=Adam,
        metrics=['accuracy'])

    model.fit(data_x[train], data_y[train], batch_size=n_batch, epochs=n_epochs)

    model.save(saved_model_folder + '/%i_%i_noca.h5' % (seed, i + 1))
    print('model saved')

    test_loss, test_acc = model.evaluate(data_x[test], data_y[test])
    predictions = model.predict(data_x[test])

    accs.append(test_acc)
    
    fpr, tpr, threshold = metrics.roc_curve(data_y[test], predictions)
    roc_auc = metrics.auc(fpr, tpr)

    tprs.append(interp(mean_fpr, fpr, tpr))
    tprs[-1][0] = 0.0
    ROC_aucs.append(roc_auc)

    precision, recall, thresholds = precision_recall_curve(data_y[test], predictions)
    f1 = f1_score(data_y[test], predictions.round())
    f1s.append(f1)
    rp_auc = metrics.auc(recall, precision)
    RP_aucs.append(rp_auc)
    
    i = i + 1

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = metrics.auc(mean_fpr, mean_tpr)

In [None]:
with open(performance_file, 'a') as perf_file:
    perf_file.write('\t'.join([str(seed), str(sum(accs) / 3), '\t'.join(str(x) for x in accs), 
                               str(mean_auc), '\t'.join(str(x) for x in ROC_aucs),
                               str(sum(f1s)/3), '\t'.join(str(x) for x in f1s),
                               str(sum(RP_aucs) / 3), '\t'.join(str(x) for x in RP_aucs)]) + '\n')